Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Mon, Jul 16, 2018 at 03:03:31PM -0400, Rik van Riel wrote: > The mm_struct always contains a cpumask bitmap, regardless of > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > simplify things, and simply have one bitmask at the end of the > mm_struct for the mm_cpumask. > > This does necessitate moving everything else in mm_struct into > an anonymous sub-structure, which can be randomized when struct > randomization is enabled. > > The second step is to determine the correct size for the > mm_struct slab object from the size of the mm_struct > (excluding the cpu bitmap) and the size the cpumask. > > For init_mm we can simply allocate the maximum size this > kernel is compiled for, since we only have one init_mm > in the system, anyway. > > Pointer magic by Mike Galbraith, to evade -Wstringop-overflow > getting confused by the dynamically sized array. > > Signed-off-by: Rik van Riel > Signed-off-by: Mike Galbraith > Signed-off-by: Rik van Riel > Acked-by: Dave Hansen > Tested-by: Song Liu Hi, this patch causes unicore32 build failures. In file included from include/linux/mm.h:17, from arch/unicore32/kernel/asm-offsets.c:17: include/linux/mm_types.h:497: error: flexible array member in otherwise empty struct Build reference: next-20180803 gcc version: unicore32-linux-gcc (UC4_1.0.5_20100917) 4.4.2 I understand this is an old compiler, but it is the only available version as far as I know. Guenter
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Mon, Jul 16, 2018 at 03:03:31PM -0400, Rik van Riel wrote: > The mm_struct always contains a cpumask bitmap, regardless of > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > simplify things, and simply have one bitmask at the end of the > mm_struct for the mm_cpumask. > > This does necessitate moving everything else in mm_struct into > an anonymous sub-structure, which can be randomized when struct > randomization is enabled. > > The second step is to determine the correct size for the > mm_struct slab object from the size of the mm_struct > (excluding the cpu bitmap) and the size the cpumask. > > For init_mm we can simply allocate the maximum size this > kernel is compiled for, since we only have one init_mm > in the system, anyway. > > Pointer magic by Mike Galbraith, to evade -Wstringop-overflow > getting confused by the dynamically sized array. > > Signed-off-by: Rik van Riel > Signed-off-by: Mike Galbraith > Signed-off-by: Rik van Riel > Acked-by: Dave Hansen > Tested-by: Song Liu Hi, this patch causes unicore32 build failures. In file included from include/linux/mm.h:17, from arch/unicore32/kernel/asm-offsets.c:17: include/linux/mm_types.h:497: error: flexible array member in otherwise empty struct Build reference: next-20180803 gcc version: unicore32-linux-gcc (UC4_1.0.5_20100917) 4.4.2 I understand this is an old compiler, but it is the only available version as far as I know. Guenter
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Pointer magic by Mike Galbraith, to evade -Wstringop-overflow getting confused by the dynamically sized array. Signed-off-by: Rik van Riel Signed-off-by: Mike Galbraith Signed-off-by: Rik van Riel Acked-by: Dave Hansen Tested-by: Song Liu --- drivers/firmware/efi/efi.c | 1 + include/linux/mm_types.h | 241 +++-- kernel/fork.c | 15 +-- mm/init-mm.c | 11 +++ 4 files changed, 145 insertions(+), 123 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 232f4915223b..7f0b19410a95 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -82,6 +82,7 @@ struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, }; static bool disable_runtime; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..efdc24dd9e97 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,183 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap;/* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap;/* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base;/* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base;/* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size;/* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** -* @mm_users: The number of users including userspace. -* -* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops -* to 0 (i.e. when the task exits and there are no other temporary -* reference holders), we also release a reference on @mm_count -* (which may then free the mm_struct if @mm_count also -* drops to 0). -*/ - atomic_t mm_users; - - /** -* @mm_count: The number of references to mm_struct -* (@mm_users count as 1). -* -* Use mmgrab()/mmdrop() to modify. When this drops to 0, the -* mm_struct is freed. -*/ - atomic_t mm_count; + unsigned long task_size;/* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** +* @mm_users: The number of users including userspace. +* +* Use mmget()/mmget_not_zero()/mmput() to modify. When this +* drops to 0 (i.e. when the task exits and there are no other +* temporary reference holders), we also release a reference on +* @mm_count (which may then free the mm_struct if +* @mm_count also drops to 0). +*/ + atomic_t mm_users; + + /** +* @mm_count: The number of references to
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Pointer magic by Mike Galbraith, to evade -Wstringop-overflow getting confused by the dynamically sized array. Signed-off-by: Rik van Riel Signed-off-by: Mike Galbraith Signed-off-by: Rik van Riel Acked-by: Dave Hansen Tested-by: Song Liu --- drivers/firmware/efi/efi.c | 1 + include/linux/mm_types.h | 241 +++-- kernel/fork.c | 15 +-- mm/init-mm.c | 11 +++ 4 files changed, 145 insertions(+), 123 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 232f4915223b..7f0b19410a95 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -82,6 +82,7 @@ struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, }; static bool disable_runtime; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..efdc24dd9e97 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,183 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap;/* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap;/* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base;/* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base;/* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size;/* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** -* @mm_users: The number of users including userspace. -* -* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops -* to 0 (i.e. when the task exits and there are no other temporary -* reference holders), we also release a reference on @mm_count -* (which may then free the mm_struct if @mm_count also -* drops to 0). -*/ - atomic_t mm_users; - - /** -* @mm_count: The number of references to mm_struct -* (@mm_users count as 1). -* -* Use mmgrab()/mmdrop() to modify. When this drops to 0, the -* mm_struct is freed. -*/ - atomic_t mm_count; + unsigned long task_size;/* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** +* @mm_users: The number of users including userspace. +* +* Use mmget()/mmget_not_zero()/mmput() to modify. When this +* drops to 0 (i.e. when the task exits and there are no other +* temporary reference holders), we also release a reference on +* @mm_count (which may then free the mm_struct if +* @mm_count also drops to 0). +*/ + atomic_t mm_users; + + /** +* @mm_count: The number of references to
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
* Rik van Riel wrote: > On Mon, 2018-07-16 at 00:59 +0200, Ingo Molnar wrote: > > * Rik van Riel wrote: > > > > > The mm_struct always contains a cpumask bitmap, regardless of > > > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > > > simplify things, and simply have one bitmask at the end of the > > > mm_struct for the mm_cpumask. > > > > > > This does necessitate moving everything else in mm_struct into > > > an anonymous sub-structure, which can be randomized when struct > > > randomization is enabled. > > > > > > The second step is to determine the correct size for the > > > mm_struct slab object from the size of the mm_struct > > > (excluding the cpu bitmap) and the size the cpumask. > > > > > > For init_mm we can simply allocate the maximum size this > > > kernel is compiled for, since we only have one init_mm > > > in the system, anyway. > > > > > > Pointer magic by Mike Galbraith, to evade -Wstringop-overflow > > > getting confused by the dynamically sized array. > > > > > > Signed-off-by: Rik van Riel > > > Signed-off-by: Mike Galbraith > > > > Is this an Acked-by in disguise, or did this patch route via Mike? > > Mike found an issue with the patch and sent a > fix, so I added his S-o-b to this patch as > well. Makes sense - I'd suggest such a SoB chain: Signed-off-by: Rik van Riel [ Fixed crash. ] Signed-off-by: Mike Galbraith Signed-off-by: Rik van Riel ... it's a bit non-standard but we've used it in similar cases and it makes the routing and evolution of the patch pretty clear. Thanks, Ingo
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
* Rik van Riel wrote: > On Mon, 2018-07-16 at 00:59 +0200, Ingo Molnar wrote: > > * Rik van Riel wrote: > > > > > The mm_struct always contains a cpumask bitmap, regardless of > > > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > > > simplify things, and simply have one bitmask at the end of the > > > mm_struct for the mm_cpumask. > > > > > > This does necessitate moving everything else in mm_struct into > > > an anonymous sub-structure, which can be randomized when struct > > > randomization is enabled. > > > > > > The second step is to determine the correct size for the > > > mm_struct slab object from the size of the mm_struct > > > (excluding the cpu bitmap) and the size the cpumask. > > > > > > For init_mm we can simply allocate the maximum size this > > > kernel is compiled for, since we only have one init_mm > > > in the system, anyway. > > > > > > Pointer magic by Mike Galbraith, to evade -Wstringop-overflow > > > getting confused by the dynamically sized array. > > > > > > Signed-off-by: Rik van Riel > > > Signed-off-by: Mike Galbraith > > > > Is this an Acked-by in disguise, or did this patch route via Mike? > > Mike found an issue with the patch and sent a > fix, so I added his S-o-b to this patch as > well. Makes sense - I'd suggest such a SoB chain: Signed-off-by: Rik van Riel [ Fixed crash. ] Signed-off-by: Mike Galbraith Signed-off-by: Rik van Riel ... it's a bit non-standard but we've used it in similar cases and it makes the routing and evolution of the patch pretty clear. Thanks, Ingo
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Mon, 2018-07-16 at 00:59 +0200, Ingo Molnar wrote: > * Rik van Riel wrote: > > > The mm_struct always contains a cpumask bitmap, regardless of > > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > > simplify things, and simply have one bitmask at the end of the > > mm_struct for the mm_cpumask. > > > > This does necessitate moving everything else in mm_struct into > > an anonymous sub-structure, which can be randomized when struct > > randomization is enabled. > > > > The second step is to determine the correct size for the > > mm_struct slab object from the size of the mm_struct > > (excluding the cpu bitmap) and the size the cpumask. > > > > For init_mm we can simply allocate the maximum size this > > kernel is compiled for, since we only have one init_mm > > in the system, anyway. > > > > Pointer magic by Mike Galbraith, to evade -Wstringop-overflow > > getting confused by the dynamically sized array. > > > > Signed-off-by: Rik van Riel > > Signed-off-by: Mike Galbraith > > Is this an Acked-by in disguise, or did this patch route via Mike? Mike found an issue with the patch and sent a fix, so I added his S-o-b to this patch as well. -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Mon, 2018-07-16 at 00:59 +0200, Ingo Molnar wrote: > * Rik van Riel wrote: > > > The mm_struct always contains a cpumask bitmap, regardless of > > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > > simplify things, and simply have one bitmask at the end of the > > mm_struct for the mm_cpumask. > > > > This does necessitate moving everything else in mm_struct into > > an anonymous sub-structure, which can be randomized when struct > > randomization is enabled. > > > > The second step is to determine the correct size for the > > mm_struct slab object from the size of the mm_struct > > (excluding the cpu bitmap) and the size the cpumask. > > > > For init_mm we can simply allocate the maximum size this > > kernel is compiled for, since we only have one init_mm > > in the system, anyway. > > > > Pointer magic by Mike Galbraith, to evade -Wstringop-overflow > > getting confused by the dynamically sized array. > > > > Signed-off-by: Rik van Riel > > Signed-off-by: Mike Galbraith > > Is this an Acked-by in disguise, or did this patch route via Mike? Mike found an issue with the patch and sent a fix, so I added his S-o-b to this patch as well. -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
* Rik van Riel wrote: > The mm_struct always contains a cpumask bitmap, regardless of > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > simplify things, and simply have one bitmask at the end of the > mm_struct for the mm_cpumask. > > This does necessitate moving everything else in mm_struct into > an anonymous sub-structure, which can be randomized when struct > randomization is enabled. > > The second step is to determine the correct size for the > mm_struct slab object from the size of the mm_struct > (excluding the cpu bitmap) and the size the cpumask. > > For init_mm we can simply allocate the maximum size this > kernel is compiled for, since we only have one init_mm > in the system, anyway. > > Pointer magic by Mike Galbraith, to evade -Wstringop-overflow > getting confused by the dynamically sized array. > > Signed-off-by: Rik van Riel > Signed-off-by: Mike Galbraith Is this an Acked-by in disguise, or did this patch route via Mike? Thanks, Ingo
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
* Rik van Riel wrote: > The mm_struct always contains a cpumask bitmap, regardless of > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > simplify things, and simply have one bitmask at the end of the > mm_struct for the mm_cpumask. > > This does necessitate moving everything else in mm_struct into > an anonymous sub-structure, which can be randomized when struct > randomization is enabled. > > The second step is to determine the correct size for the > mm_struct slab object from the size of the mm_struct > (excluding the cpu bitmap) and the size the cpumask. > > For init_mm we can simply allocate the maximum size this > kernel is compiled for, since we only have one init_mm > in the system, anyway. > > Pointer magic by Mike Galbraith, to evade -Wstringop-overflow > getting confused by the dynamically sized array. > > Signed-off-by: Rik van Riel > Signed-off-by: Mike Galbraith Is this an Acked-by in disguise, or did this patch route via Mike? Thanks, Ingo
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Pointer magic by Mike Galbraith, to evade -Wstringop-overflow getting confused by the dynamically sized array. Signed-off-by: Rik van Riel Signed-off-by: Mike Galbraith Acked-by: Dave Hansen Tested-by: Song Liu --- drivers/firmware/efi/efi.c | 1 + include/linux/mm_types.h | 241 +++-- kernel/fork.c | 15 +-- mm/init-mm.c | 11 +++ 4 files changed, 145 insertions(+), 123 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 232f4915223b..7f0b19410a95 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -82,6 +82,7 @@ struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, }; static bool disable_runtime; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..efdc24dd9e97 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,183 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap;/* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap;/* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base;/* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base;/* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size;/* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** -* @mm_users: The number of users including userspace. -* -* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops -* to 0 (i.e. when the task exits and there are no other temporary -* reference holders), we also release a reference on @mm_count -* (which may then free the mm_struct if @mm_count also -* drops to 0). -*/ - atomic_t mm_users; - - /** -* @mm_count: The number of references to mm_struct -* (@mm_users count as 1). -* -* Use mmgrab()/mmdrop() to modify. When this drops to 0, the -* mm_struct is freed. -*/ - atomic_t mm_count; + unsigned long task_size;/* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** +* @mm_users: The number of users including userspace. +* +* Use mmget()/mmget_not_zero()/mmput() to modify. When this +* drops to 0 (i.e. when the task exits and there are no other +* temporary reference holders), we also release a reference on +* @mm_count (which may then free the mm_struct if +* @mm_count also drops to 0). +*/ + atomic_t mm_users; + + /** +* @mm_count: The number of references to mm_struct +*
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Pointer magic by Mike Galbraith, to evade -Wstringop-overflow getting confused by the dynamically sized array. Signed-off-by: Rik van Riel Signed-off-by: Mike Galbraith Acked-by: Dave Hansen Tested-by: Song Liu --- drivers/firmware/efi/efi.c | 1 + include/linux/mm_types.h | 241 +++-- kernel/fork.c | 15 +-- mm/init-mm.c | 11 +++ 4 files changed, 145 insertions(+), 123 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 232f4915223b..7f0b19410a95 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -82,6 +82,7 @@ struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, }; static bool disable_runtime; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..efdc24dd9e97 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,183 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap;/* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap;/* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base;/* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base;/* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size;/* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** -* @mm_users: The number of users including userspace. -* -* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops -* to 0 (i.e. when the task exits and there are no other temporary -* reference holders), we also release a reference on @mm_count -* (which may then free the mm_struct if @mm_count also -* drops to 0). -*/ - atomic_t mm_users; - - /** -* @mm_count: The number of references to mm_struct -* (@mm_users count as 1). -* -* Use mmgrab()/mmdrop() to modify. When this drops to 0, the -* mm_struct is freed. -*/ - atomic_t mm_count; + unsigned long task_size;/* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** +* @mm_users: The number of users including userspace. +* +* Use mmget()/mmget_not_zero()/mmput() to modify. When this +* drops to 0 (i.e. when the task exits and there are no other +* temporary reference holders), we also release a reference on +* @mm_count (which may then free the mm_struct if +* @mm_count also drops to 0). +*/ + atomic_t mm_users; + + /** +* @mm_count: The number of references to mm_struct +*
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Mon, 2018-07-09 at 17:38 -0400, Rik van Riel wrote: > > I added your code, and Signed-off-By in patch > 1 for version 5 of the series. No objection, but no need (like taking credit for fixing a typo:).
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Mon, 2018-07-09 at 17:38 -0400, Rik van Riel wrote: > > I added your code, and Signed-off-By in patch > 1 for version 5 of the series. No objection, but no need (like taking credit for fixing a typo:).
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Sun, 2018-07-08 at 16:13 +0200, Mike Galbraith wrote: > On Sat, 2018-07-07 at 17:25 -0400, Rik van Riel wrote: > > > > > ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 > > > bytes > > > into a region of size 0 overflows the destination [-Wstringop- > > > overflow=] > > >memset(dst, 0, len); > > >^~~ > > > > I don't understand this one. > > > > Inside init_mm we have this line: > > .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, > > > > which is the way the documentation suggests statically > > allocated variable size arrays should be allocated > > and initialized. > > > > How does that result in a memset of the same size, > > on the same array, to throw an error like above? > > Compiler knows that ->cpu_bitmap is 64 bits of storage, and with > !CPUMASK_OFFSTACK, nr_cpumask_bits = NR_CPUS. With NR_CPUS > 64, > compiler gripes, with NR_CPUS <= 64 it's a happy camper. > > > What am I doing wrong? > > Below is what I did to get box to both STHU, and to boot with the > openSUSE master branch config I sent. Without the efi_mm hunk, boot > hangs early with or without the other hunk. > > I build and boot tested the openSUSE config, a NOPREEMPT+MAXSMP > config, > my local config w. NR_CPUS=8, and master-rt w. NR_CPUS=256, which is > the only one that got any real exercise (building the others). > Thank you for tracking that down. I added your code, and Signed-off-By in patch 1 for version 5 of the series. -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Sun, 2018-07-08 at 16:13 +0200, Mike Galbraith wrote: > On Sat, 2018-07-07 at 17:25 -0400, Rik van Riel wrote: > > > > > ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 > > > bytes > > > into a region of size 0 overflows the destination [-Wstringop- > > > overflow=] > > >memset(dst, 0, len); > > >^~~ > > > > I don't understand this one. > > > > Inside init_mm we have this line: > > .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, > > > > which is the way the documentation suggests statically > > allocated variable size arrays should be allocated > > and initialized. > > > > How does that result in a memset of the same size, > > on the same array, to throw an error like above? > > Compiler knows that ->cpu_bitmap is 64 bits of storage, and with > !CPUMASK_OFFSTACK, nr_cpumask_bits = NR_CPUS. With NR_CPUS > 64, > compiler gripes, with NR_CPUS <= 64 it's a happy camper. > > > What am I doing wrong? > > Below is what I did to get box to both STHU, and to boot with the > openSUSE master branch config I sent. Without the efi_mm hunk, boot > hangs early with or without the other hunk. > > I build and boot tested the openSUSE config, a NOPREEMPT+MAXSMP > config, > my local config w. NR_CPUS=8, and master-rt w. NR_CPUS=256, which is > the only one that got any real exercise (building the others). > Thank you for tracking that down. I added your code, and Signed-off-By in patch 1 for version 5 of the series. -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
BTW, a second gripe ala the first, but wrt mm_init_cpumask(_mm): In function ‘bitmap_zero’, inlined from ‘cpumask_clear’ at ./include/linux/cpumask.h:378:2, inlined from ‘mm_init_cpumask’ at ./include/linux/mm_types.h:504:2, inlined from ‘efi_alloc_page_tables’ at arch/x86/platform/efi/efi_64.c:235:2: ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 bytes into a region of size 0 overflows the destination [-Wstringop-overflow=] memset(dst, 0, len); ^~~
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
BTW, a second gripe ala the first, but wrt mm_init_cpumask(_mm): In function ‘bitmap_zero’, inlined from ‘cpumask_clear’ at ./include/linux/cpumask.h:378:2, inlined from ‘mm_init_cpumask’ at ./include/linux/mm_types.h:504:2, inlined from ‘efi_alloc_page_tables’ at arch/x86/platform/efi/efi_64.c:235:2: ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 bytes into a region of size 0 overflows the destination [-Wstringop-overflow=] memset(dst, 0, len); ^~~
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Sat, 2018-07-07 at 17:25 -0400, Rik van Riel wrote: > > > ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 bytes > > into a region of size 0 overflows the destination [-Wstringop- > > overflow=] > >memset(dst, 0, len); > >^~~ > > I don't understand this one. > > Inside init_mm we have this line: > .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, > > which is the way the documentation suggests statically > allocated variable size arrays should be allocated > and initialized. > > How does that result in a memset of the same size, > on the same array, to throw an error like above? Compiler knows that ->cpu_bitmap is 64 bits of storage, and with !CPUMASK_OFFSTACK, nr_cpumask_bits = NR_CPUS. With NR_CPUS > 64, compiler gripes, with NR_CPUS <= 64 it's a happy camper. > What am I doing wrong? Below is what I did to get box to both STHU, and to boot with the openSUSE master branch config I sent. Without the efi_mm hunk, boot hangs early with or without the other hunk. I build and boot tested the openSUSE config, a NOPREEMPT+MAXSMP config, my local config w. NR_CPUS=8, and master-rt w. NR_CPUS=256, which is the only one that got any real exercise (building the others). --- drivers/firmware/efi/efi.c |1 + include/linux/mm_types.h |5 - 2 files changed, 5 insertions(+), 1 deletion(-) --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -82,6 +82,7 @@ struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, }; static bool disable_runtime; --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -501,7 +501,10 @@ extern struct mm_struct init_mm; static inline void mm_init_cpumask(struct mm_struct *mm) { - cpumask_clear((struct cpumask *)>cpu_bitmap); + unsigned long cpu_bitmap = (unsigned long)mm; + + cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); + cpumask_clear((struct cpumask *)cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Sat, 2018-07-07 at 17:25 -0400, Rik van Riel wrote: > > > ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 bytes > > into a region of size 0 overflows the destination [-Wstringop- > > overflow=] > >memset(dst, 0, len); > >^~~ > > I don't understand this one. > > Inside init_mm we have this line: > .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, > > which is the way the documentation suggests statically > allocated variable size arrays should be allocated > and initialized. > > How does that result in a memset of the same size, > on the same array, to throw an error like above? Compiler knows that ->cpu_bitmap is 64 bits of storage, and with !CPUMASK_OFFSTACK, nr_cpumask_bits = NR_CPUS. With NR_CPUS > 64, compiler gripes, with NR_CPUS <= 64 it's a happy camper. > What am I doing wrong? Below is what I did to get box to both STHU, and to boot with the openSUSE master branch config I sent. Without the efi_mm hunk, boot hangs early with or without the other hunk. I build and boot tested the openSUSE config, a NOPREEMPT+MAXSMP config, my local config w. NR_CPUS=8, and master-rt w. NR_CPUS=256, which is the only one that got any real exercise (building the others). --- drivers/firmware/efi/efi.c |1 + include/linux/mm_types.h |5 - 2 files changed, 5 insertions(+), 1 deletion(-) --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -82,6 +82,7 @@ struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, }; static bool disable_runtime; --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -501,7 +501,10 @@ extern struct mm_struct init_mm; static inline void mm_init_cpumask(struct mm_struct *mm) { - cpumask_clear((struct cpumask *)>cpu_bitmap); + unsigned long cpu_bitmap = (unsigned long)mm; + + cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); + cpumask_clear((struct cpumask *)cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
I. On Sat, 2018-07-07 at 10:23 +0200, Mike Galbraith wrote: > On Fri, 2018-07-06 at 17:56 -0400, Rik van Riel wrote: > > The mm_struct always contains a cpumask bitmap, regardless of > > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > > simplify things, and simply have one bitmask at the end of the > > mm_struct for the mm_cpumask. > > Otherwise virgin master.today grumbles. > > CC kernel/bounds.s > UPD include/generated/timeconst.h > UPD include/generated/bounds.h > CC arch/x86/kernel/asm-offsets.s > UPD include/generated/asm-offsets.h > CALLscripts/checksyscalls.sh > CHK include/generated/compile.h > HOSTCC usr/gen_init_cpio > UPD include/generated/compile.h > CC init/main.o > In file included from ./include/linux/cpumask.h:12:0, > from ./arch/x86/include/asm/cpumask.h:5, > from ./arch/x86/include/asm/msr.h:11, > from ./arch/x86/include/asm/processor.h:21, > from ./arch/x86/include/asm/cpufeature.h:5, > from ./arch/x86/include/asm/thread_info.h:53, > from ./include/linux/thread_info.h:38, > from ./arch/x86/include/asm/preempt.h:7, > from ./include/linux/preempt.h:81, > from ./include/linux/spinlock.h:51, > from ./include/linux/seqlock.h:36, > from ./include/linux/time.h:6, > from ./include/linux/stat.h:19, > from ./include/linux/module.h:10, > from init/main.c:16: > In function ‘bitmap_zero’, > inlined from ‘cpumask_clear’ at ./include/linux/cpumask.h:378:2, > inlined from ‘mm_init_cpumask’ at > ./include/linux/mm_types.h:504:2, > inlined from ‘start_kernel’ at init/main.c:560:2: > ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 bytes > into a region of size 0 overflows the destination [-Wstringop- > overflow=] >memset(dst, 0, len); >^~~ I don't understand this one. Inside init_mm we have this line: .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, which is the way the documentation suggests statically allocated variable size arrays should be allocated and initialized. How does that result in a memset of the same size, on the same array, to throw an error like above? What am I doing wrong? -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
I. On Sat, 2018-07-07 at 10:23 +0200, Mike Galbraith wrote: > On Fri, 2018-07-06 at 17:56 -0400, Rik van Riel wrote: > > The mm_struct always contains a cpumask bitmap, regardless of > > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > > simplify things, and simply have one bitmask at the end of the > > mm_struct for the mm_cpumask. > > Otherwise virgin master.today grumbles. > > CC kernel/bounds.s > UPD include/generated/timeconst.h > UPD include/generated/bounds.h > CC arch/x86/kernel/asm-offsets.s > UPD include/generated/asm-offsets.h > CALLscripts/checksyscalls.sh > CHK include/generated/compile.h > HOSTCC usr/gen_init_cpio > UPD include/generated/compile.h > CC init/main.o > In file included from ./include/linux/cpumask.h:12:0, > from ./arch/x86/include/asm/cpumask.h:5, > from ./arch/x86/include/asm/msr.h:11, > from ./arch/x86/include/asm/processor.h:21, > from ./arch/x86/include/asm/cpufeature.h:5, > from ./arch/x86/include/asm/thread_info.h:53, > from ./include/linux/thread_info.h:38, > from ./arch/x86/include/asm/preempt.h:7, > from ./include/linux/preempt.h:81, > from ./include/linux/spinlock.h:51, > from ./include/linux/seqlock.h:36, > from ./include/linux/time.h:6, > from ./include/linux/stat.h:19, > from ./include/linux/module.h:10, > from init/main.c:16: > In function ‘bitmap_zero’, > inlined from ‘cpumask_clear’ at ./include/linux/cpumask.h:378:2, > inlined from ‘mm_init_cpumask’ at > ./include/linux/mm_types.h:504:2, > inlined from ‘start_kernel’ at init/main.c:560:2: > ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 bytes > into a region of size 0 overflows the destination [-Wstringop- > overflow=] >memset(dst, 0, len); >^~~ I don't understand this one. Inside init_mm we have this line: .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, which is the way the documentation suggests statically allocated variable size arrays should be allocated and initialized. How does that result in a memset of the same size, on the same array, to throw an error like above? What am I doing wrong? -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Fri, 2018-07-06 at 17:56 -0400, Rik van Riel wrote: > The mm_struct always contains a cpumask bitmap, regardless of > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > simplify things, and simply have one bitmask at the end of the > mm_struct for the mm_cpumask. Otherwise virgin master.today grumbles. CC kernel/bounds.s UPD include/generated/timeconst.h UPD include/generated/bounds.h CC arch/x86/kernel/asm-offsets.s UPD include/generated/asm-offsets.h CALLscripts/checksyscalls.sh CHK include/generated/compile.h HOSTCC usr/gen_init_cpio UPD include/generated/compile.h CC init/main.o In file included from ./include/linux/cpumask.h:12:0, from ./arch/x86/include/asm/cpumask.h:5, from ./arch/x86/include/asm/msr.h:11, from ./arch/x86/include/asm/processor.h:21, from ./arch/x86/include/asm/cpufeature.h:5, from ./arch/x86/include/asm/thread_info.h:53, from ./include/linux/thread_info.h:38, from ./arch/x86/include/asm/preempt.h:7, from ./include/linux/preempt.h:81, from ./include/linux/spinlock.h:51, from ./include/linux/seqlock.h:36, from ./include/linux/time.h:6, from ./include/linux/stat.h:19, from ./include/linux/module.h:10, from init/main.c:16: In function ‘bitmap_zero’, inlined from ‘cpumask_clear’ at ./include/linux/cpumask.h:378:2, inlined from ‘mm_init_cpumask’ at ./include/linux/mm_types.h:504:2, inlined from ‘start_kernel’ at init/main.c:560:2: ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 bytes into a region of size 0 overflows the destination [-Wstringop-overflow=] memset(dst, 0, len); ^~~ config.gz Description: application/gzip
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Fri, 2018-07-06 at 17:56 -0400, Rik van Riel wrote: > The mm_struct always contains a cpumask bitmap, regardless of > CONFIG_CPUMASK_OFFSTACK. That means the first step can be to > simplify things, and simply have one bitmask at the end of the > mm_struct for the mm_cpumask. Otherwise virgin master.today grumbles. CC kernel/bounds.s UPD include/generated/timeconst.h UPD include/generated/bounds.h CC arch/x86/kernel/asm-offsets.s UPD include/generated/asm-offsets.h CALLscripts/checksyscalls.sh CHK include/generated/compile.h HOSTCC usr/gen_init_cpio UPD include/generated/compile.h CC init/main.o In file included from ./include/linux/cpumask.h:12:0, from ./arch/x86/include/asm/cpumask.h:5, from ./arch/x86/include/asm/msr.h:11, from ./arch/x86/include/asm/processor.h:21, from ./arch/x86/include/asm/cpufeature.h:5, from ./arch/x86/include/asm/thread_info.h:53, from ./include/linux/thread_info.h:38, from ./arch/x86/include/asm/preempt.h:7, from ./include/linux/preempt.h:81, from ./include/linux/spinlock.h:51, from ./include/linux/seqlock.h:36, from ./include/linux/time.h:6, from ./include/linux/stat.h:19, from ./include/linux/module.h:10, from init/main.c:16: In function ‘bitmap_zero’, inlined from ‘cpumask_clear’ at ./include/linux/cpumask.h:378:2, inlined from ‘mm_init_cpumask’ at ./include/linux/mm_types.h:504:2, inlined from ‘start_kernel’ at init/main.c:560:2: ./include/linux/bitmap.h:208:3: warning: ‘memset’ writing 64 bytes into a region of size 0 overflows the destination [-Wstringop-overflow=] memset(dst, 0, len); ^~~ config.gz Description: application/gzip
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Signed-off-by: Rik van Riel Acked-by: Dave Hansen Tested-by: Song Liu --- include/linux/mm_types.h | 237 --- kernel/fork.c| 15 +-- mm/init-mm.c | 11 +++ 3 files changed, 140 insertions(+), 123 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..e06de7e492d0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,179 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap;/* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap;/* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base;/* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base;/* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size;/* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** -* @mm_users: The number of users including userspace. -* -* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops -* to 0 (i.e. when the task exits and there are no other temporary -* reference holders), we also release a reference on @mm_count -* (which may then free the mm_struct if @mm_count also -* drops to 0). -*/ - atomic_t mm_users; - - /** -* @mm_count: The number of references to mm_struct -* (@mm_users count as 1). -* -* Use mmgrab()/mmdrop() to modify. When this drops to 0, the -* mm_struct is freed. -*/ - atomic_t mm_count; + unsigned long task_size;/* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** +* @mm_users: The number of users including userspace. +* +* Use mmget()/mmget_not_zero()/mmput() to modify. When this +* drops to 0 (i.e. when the task exits and there are no other +* temporary reference holders), we also release a reference on +* @mm_count (which may then free the mm_struct if +* @mm_count also drops to 0). +*/ + atomic_t mm_users; + + /** +* @mm_count: The number of references to mm_struct +* (@mm_users count as 1). +* +* Use mmgrab()/mmdrop() to modify. When this drops to 0, the +* mm_struct is freed. +*/ + atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif - int map_count; /* number of VMAs */ + int map_count; /* number of VMAs */ - spinlock_t page_table_lock; /* Protects page tables and some counters */ - struct rw_semaphore mmap_sem; + spinlock_t page_table_lock; /* Protects
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Signed-off-by: Rik van Riel Acked-by: Dave Hansen Tested-by: Song Liu --- include/linux/mm_types.h | 237 --- kernel/fork.c| 15 +-- mm/init-mm.c | 11 +++ 3 files changed, 140 insertions(+), 123 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..e06de7e492d0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,179 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap;/* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap;/* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base;/* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base;/* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size;/* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** -* @mm_users: The number of users including userspace. -* -* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops -* to 0 (i.e. when the task exits and there are no other temporary -* reference holders), we also release a reference on @mm_count -* (which may then free the mm_struct if @mm_count also -* drops to 0). -*/ - atomic_t mm_users; - - /** -* @mm_count: The number of references to mm_struct -* (@mm_users count as 1). -* -* Use mmgrab()/mmdrop() to modify. When this drops to 0, the -* mm_struct is freed. -*/ - atomic_t mm_count; + unsigned long task_size;/* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** +* @mm_users: The number of users including userspace. +* +* Use mmget()/mmget_not_zero()/mmput() to modify. When this +* drops to 0 (i.e. when the task exits and there are no other +* temporary reference holders), we also release a reference on +* @mm_count (which may then free the mm_struct if +* @mm_count also drops to 0). +*/ + atomic_t mm_users; + + /** +* @mm_count: The number of references to mm_struct +* (@mm_users count as 1). +* +* Use mmgrab()/mmdrop() to modify. When this drops to 0, the +* mm_struct is freed. +*/ + atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif - int map_count; /* number of VMAs */ + int map_count; /* number of VMAs */ - spinlock_t page_table_lock; /* Protects page tables and some counters */ - struct rw_semaphore mmap_sem; + spinlock_t page_table_lock; /* Protects
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
Hi Rik, Thank you for the patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.18-rc2 next-20180629] [cannot apply to tip/x86/core] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode-even-lazier/20180629-232822 config: s390-debug_defconfig (attached as .config) compiler: s390x-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree GCC_VERSION=7.2.0 make.cross ARCH=s390 All errors (new ones prefixed by >>): In file included from arch/s390/include/asm/fpu/internal.h:12:0, from arch/s390/include/asm/processor.h:47, from arch/s390/include/asm/thread_info.h:25, from include/linux/thread_info.h:38, from arch/s390/include/asm/preempt.h:6, from include/linux/preempt.h:81, from include/linux/spinlock.h:51, from include/linux/seqlock.h:36, from include/linux/time.h:6, from include/linux/stat.h:19, from include/linux/module.h:10, from init/main.c:16: In function 'memset', inlined from 'start_kernel' at include/linux/bitmap.h:208:3: >> include/linux/string.h:327:3: error: call to '__write_overflow' declared >> with attribute error: detected write beyond size of object passed as 1st >> parameter __write_overflow(); ^~ vim +/__write_overflow +327 include/linux/string.h 6974f0c4 Daniel Micay 2017-07-12 322 6974f0c4 Daniel Micay 2017-07-12 323 __FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size) 6974f0c4 Daniel Micay 2017-07-12 324 { 6974f0c4 Daniel Micay 2017-07-12 325 size_t p_size = __builtin_object_size(p, 0); 6974f0c4 Daniel Micay 2017-07-12 326 if (__builtin_constant_p(size) && p_size < size) 6974f0c4 Daniel Micay 2017-07-12 @327 __write_overflow(); 6974f0c4 Daniel Micay 2017-07-12 328 if (p_size < size) 6974f0c4 Daniel Micay 2017-07-12 329 fortify_panic(__func__); 6974f0c4 Daniel Micay 2017-07-12 330 return __builtin_memset(p, c, size); 6974f0c4 Daniel Micay 2017-07-12 331 } 6974f0c4 Daniel Micay 2017-07-12 332 :: The code at line 327 was first introduced by commit :: 6974f0c4555e285ab217cee58b6e874f776ff409 include/linux/string.h: add the option of fortified string.h functions :: TO: Daniel Micay :: CC: Linus Torvalds --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
Hi Rik, Thank you for the patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.18-rc2 next-20180629] [cannot apply to tip/x86/core] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode-even-lazier/20180629-232822 config: s390-debug_defconfig (attached as .config) compiler: s390x-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree GCC_VERSION=7.2.0 make.cross ARCH=s390 All errors (new ones prefixed by >>): In file included from arch/s390/include/asm/fpu/internal.h:12:0, from arch/s390/include/asm/processor.h:47, from arch/s390/include/asm/thread_info.h:25, from include/linux/thread_info.h:38, from arch/s390/include/asm/preempt.h:6, from include/linux/preempt.h:81, from include/linux/spinlock.h:51, from include/linux/seqlock.h:36, from include/linux/time.h:6, from include/linux/stat.h:19, from include/linux/module.h:10, from init/main.c:16: In function 'memset', inlined from 'start_kernel' at include/linux/bitmap.h:208:3: >> include/linux/string.h:327:3: error: call to '__write_overflow' declared >> with attribute error: detected write beyond size of object passed as 1st >> parameter __write_overflow(); ^~ vim +/__write_overflow +327 include/linux/string.h 6974f0c4 Daniel Micay 2017-07-12 322 6974f0c4 Daniel Micay 2017-07-12 323 __FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size) 6974f0c4 Daniel Micay 2017-07-12 324 { 6974f0c4 Daniel Micay 2017-07-12 325 size_t p_size = __builtin_object_size(p, 0); 6974f0c4 Daniel Micay 2017-07-12 326 if (__builtin_constant_p(size) && p_size < size) 6974f0c4 Daniel Micay 2017-07-12 @327 __write_overflow(); 6974f0c4 Daniel Micay 2017-07-12 328 if (p_size < size) 6974f0c4 Daniel Micay 2017-07-12 329 fortify_panic(__func__); 6974f0c4 Daniel Micay 2017-07-12 330 return __builtin_memset(p, c, size); 6974f0c4 Daniel Micay 2017-07-12 331 } 6974f0c4 Daniel Micay 2017-07-12 332 :: The code at line 327 was first introduced by commit :: 6974f0c4555e285ab217cee58b6e874f776ff409 include/linux/string.h: add the option of fortified string.h functions :: TO: Daniel Micay :: CC: Linus Torvalds --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Signed-off-by: Rik van Riel Tested-by: Song Liu --- include/linux/mm_types.h | 237 --- kernel/fork.c| 15 +-- mm/init-mm.c | 11 +++ 3 files changed, 140 insertions(+), 123 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..e06de7e492d0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,179 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap;/* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap;/* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base;/* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base;/* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size;/* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** -* @mm_users: The number of users including userspace. -* -* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops -* to 0 (i.e. when the task exits and there are no other temporary -* reference holders), we also release a reference on @mm_count -* (which may then free the mm_struct if @mm_count also -* drops to 0). -*/ - atomic_t mm_users; - - /** -* @mm_count: The number of references to mm_struct -* (@mm_users count as 1). -* -* Use mmgrab()/mmdrop() to modify. When this drops to 0, the -* mm_struct is freed. -*/ - atomic_t mm_count; + unsigned long task_size;/* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** +* @mm_users: The number of users including userspace. +* +* Use mmget()/mmget_not_zero()/mmput() to modify. When this +* drops to 0 (i.e. when the task exits and there are no other +* temporary reference holders), we also release a reference on +* @mm_count (which may then free the mm_struct if +* @mm_count also drops to 0). +*/ + atomic_t mm_users; + + /** +* @mm_count: The number of references to mm_struct +* (@mm_users count as 1). +* +* Use mmgrab()/mmdrop() to modify. When this drops to 0, the +* mm_struct is freed. +*/ + atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif - int map_count; /* number of VMAs */ + int map_count; /* number of VMAs */ - spinlock_t page_table_lock; /* Protects page tables and some counters */ - struct rw_semaphore mmap_sem; + spinlock_t page_table_lock; /* Protects page tables and some +
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. This does necessitate moving everything else in mm_struct into an anonymous sub-structure, which can be randomized when struct randomization is enabled. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Signed-off-by: Rik van Riel Tested-by: Song Liu --- include/linux/mm_types.h | 237 --- kernel/fork.c| 15 +-- mm/init-mm.c | 11 +++ 3 files changed, 140 insertions(+), 123 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..e06de7e492d0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,176 +335,179 @@ struct core_state { struct kioctx_table; struct mm_struct { - struct vm_area_struct *mmap;/* list of VMAs */ - struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + struct { + struct vm_area_struct *mmap;/* list of VMAs */ + struct rb_root mm_rb; + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif - unsigned long mmap_base;/* base of mmap area */ - unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ + unsigned long mmap_base;/* base of mmap area */ + unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES - /* Base adresses for compatible mmap() */ - unsigned long mmap_compat_base; - unsigned long mmap_compat_legacy_base; + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; #endif - unsigned long task_size;/* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ - pgd_t * pgd; - - /** -* @mm_users: The number of users including userspace. -* -* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops -* to 0 (i.e. when the task exits and there are no other temporary -* reference holders), we also release a reference on @mm_count -* (which may then free the mm_struct if @mm_count also -* drops to 0). -*/ - atomic_t mm_users; - - /** -* @mm_count: The number of references to mm_struct -* (@mm_users count as 1). -* -* Use mmgrab()/mmdrop() to modify. When this drops to 0, the -* mm_struct is freed. -*/ - atomic_t mm_count; + unsigned long task_size;/* size of task vm space */ + unsigned long highest_vm_end; /* highest vma end address */ + pgd_t * pgd; + + /** +* @mm_users: The number of users including userspace. +* +* Use mmget()/mmget_not_zero()/mmput() to modify. When this +* drops to 0 (i.e. when the task exits and there are no other +* temporary reference holders), we also release a reference on +* @mm_count (which may then free the mm_struct if +* @mm_count also drops to 0). +*/ + atomic_t mm_users; + + /** +* @mm_count: The number of references to mm_struct +* (@mm_users count as 1). +* +* Use mmgrab()/mmdrop() to modify. When this drops to 0, the +* mm_struct is freed. +*/ + atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif - int map_count; /* number of VMAs */ + int map_count; /* number of VMAs */ - spinlock_t page_table_lock; /* Protects page tables and some counters */ - struct rw_semaphore mmap_sem; + spinlock_t page_table_lock; /* Protects page tables and some +
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Fri, 2018-06-22 at 08:10 -0700, Dave Hansen wrote: > On 06/20/2018 12:56 PM, Rik van Riel wrote: > > /* > > -* FIXME! The "sizeof(struct mm_struct)" currently > > includes the > > -* whole struct cpumask for the OFFSTACK case. We could > > change > > -* this to *only* allocate as much of it as required by > > the > > -* maximum number of CPU's we can ever have. The > > cpumask_allocation > > -* is at the end of the structure, exactly for that > > reason. > > +* The mm_cpumask is located at the end of mm_struct, and > > is > > +* dynamically sized based on nr_cpu_ids. > > */ > > + mm_size = sizeof(struct mm_struct) + cpumask_size(); > > + > > mm_cachep = kmem_cache_create_usercopy("mm_struct", > > - sizeof(struct mm_struct), > > ARCH_MIN_MMSTRUCT_ALIGN, > > + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, > > Could you add a bit to that comment, like "dynamically sized based on > nr_cpu_ids" ... which is sized based on the number of possible CPUs. > > I found myself wondering how that interacts with hotplug. Improved in my tree for v2. Thank you. -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Fri, 2018-06-22 at 08:10 -0700, Dave Hansen wrote: > On 06/20/2018 12:56 PM, Rik van Riel wrote: > > /* > > -* FIXME! The "sizeof(struct mm_struct)" currently > > includes the > > -* whole struct cpumask for the OFFSTACK case. We could > > change > > -* this to *only* allocate as much of it as required by > > the > > -* maximum number of CPU's we can ever have. The > > cpumask_allocation > > -* is at the end of the structure, exactly for that > > reason. > > +* The mm_cpumask is located at the end of mm_struct, and > > is > > +* dynamically sized based on nr_cpu_ids. > > */ > > + mm_size = sizeof(struct mm_struct) + cpumask_size(); > > + > > mm_cachep = kmem_cache_create_usercopy("mm_struct", > > - sizeof(struct mm_struct), > > ARCH_MIN_MMSTRUCT_ALIGN, > > + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, > > Could you add a bit to that comment, like "dynamically sized based on > nr_cpu_ids" ... which is sized based on the number of possible CPUs. > > I found myself wondering how that interacts with hotplug. Improved in my tree for v2. Thank you. -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On 06/20/2018 12:56 PM, Rik van Riel wrote: > /* > - * FIXME! The "sizeof(struct mm_struct)" currently includes the > - * whole struct cpumask for the OFFSTACK case. We could change > - * this to *only* allocate as much of it as required by the > - * maximum number of CPU's we can ever have. The cpumask_allocation > - * is at the end of the structure, exactly for that reason. > + * The mm_cpumask is located at the end of mm_struct, and is > + * dynamically sized based on nr_cpu_ids. >*/ > + mm_size = sizeof(struct mm_struct) + cpumask_size(); > + > mm_cachep = kmem_cache_create_usercopy("mm_struct", > - sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, > + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, Could you add a bit to that comment, like "dynamically sized based on nr_cpu_ids" ... which is sized based on the number of possible CPUs. I found myself wondering how that interacts with hotplug. t mm_struct, saved_auxv), > diff --git a/mm/init-mm.c b/mm/init-mm.c > index f94d5d15ebc0..20fe222fe4c0 100644 > --- a/mm/init-mm.c > +++ b/mm/init-mm.c > @@ -15,6 +15,15 @@ > #define INIT_MM_CONTEXT(name) > #endif > > +/* > + * For dynamically allocated mm_structs, there is a dynamically sized cpumask > + * at the end of the structure, the size of which depends on nr_cpu_ids. > That... Similar nit. Instead of calling out the variable alone, could we just say what it means logically and then reference the variable?
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On 06/20/2018 12:56 PM, Rik van Riel wrote: > /* > - * FIXME! The "sizeof(struct mm_struct)" currently includes the > - * whole struct cpumask for the OFFSTACK case. We could change > - * this to *only* allocate as much of it as required by the > - * maximum number of CPU's we can ever have. The cpumask_allocation > - * is at the end of the structure, exactly for that reason. > + * The mm_cpumask is located at the end of mm_struct, and is > + * dynamically sized based on nr_cpu_ids. >*/ > + mm_size = sizeof(struct mm_struct) + cpumask_size(); > + > mm_cachep = kmem_cache_create_usercopy("mm_struct", > - sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, > + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, Could you add a bit to that comment, like "dynamically sized based on nr_cpu_ids" ... which is sized based on the number of possible CPUs. I found myself wondering how that interacts with hotplug. t mm_struct, saved_auxv), > diff --git a/mm/init-mm.c b/mm/init-mm.c > index f94d5d15ebc0..20fe222fe4c0 100644 > --- a/mm/init-mm.c > +++ b/mm/init-mm.c > @@ -15,6 +15,15 @@ > #define INIT_MM_CONTEXT(name) > #endif > > +/* > + * For dynamically allocated mm_structs, there is a dynamically sized cpumask > + * at the end of the structure, the size of which depends on nr_cpu_ids. > That... Similar nit. Instead of calling out the variable alone, could we just say what it means logically and then reference the variable?
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Thu, 2018-06-21 at 05:32 +0800, kbuild test robot wrote: > Hi Rik, > > Thank you for the patch! Yet something to improve: > > [auto build test ERROR on v4.17] > [also build test ERROR on next-20180620] > [cannot apply to tip/x86/core linus/master mmotm/master v4.18-rc1] > [if your patch is applied to the wrong git tree, please drop us a > note to help improve the system] > > url:https://github.com/0day-ci/linux/commits/Rik-van-Riel/x86-tlb > -mm-make-lazy-TLB-mode-even-lazier/20180621-045620 > config: x86_64-randconfig-x016-201824 (attached as .config) > compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 > reproduce: > # save the attached .config to linux build tree > make ARCH=x86_64 > > Note: the linux-review/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode- > even-lazier/20180621-045620 HEAD > 7f2e7d915758c367dd0515efc17af977592fa141 builds fine. > It only hurts bisectibility. > > All errors (new ones prefixed by >>): > > > > mm/init-mm.c:38:1: error: Only string constants are supported as > > > initializers for randomized structures with flexible arrays > > }; Fixed in my tree for v2, by moving all of the randomizable bits of mm_struct into an anonymous sub-structure, and making sure the bitmap is always at the end. -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
On Thu, 2018-06-21 at 05:32 +0800, kbuild test robot wrote: > Hi Rik, > > Thank you for the patch! Yet something to improve: > > [auto build test ERROR on v4.17] > [also build test ERROR on next-20180620] > [cannot apply to tip/x86/core linus/master mmotm/master v4.18-rc1] > [if your patch is applied to the wrong git tree, please drop us a > note to help improve the system] > > url:https://github.com/0day-ci/linux/commits/Rik-van-Riel/x86-tlb > -mm-make-lazy-TLB-mode-even-lazier/20180621-045620 > config: x86_64-randconfig-x016-201824 (attached as .config) > compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 > reproduce: > # save the attached .config to linux build tree > make ARCH=x86_64 > > Note: the linux-review/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode- > even-lazier/20180621-045620 HEAD > 7f2e7d915758c367dd0515efc17af977592fa141 builds fine. > It only hurts bisectibility. > > All errors (new ones prefixed by >>): > > > > mm/init-mm.c:38:1: error: Only string constants are supported as > > > initializers for randomized structures with flexible arrays > > }; Fixed in my tree for v2, by moving all of the randomizable bits of mm_struct into an anonymous sub-structure, and making sure the bitmap is always at the end. -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
Hi Rik, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on v4.17] [also build test WARNING on next-20180620] [cannot apply to tip/x86/core linus/master mmotm/master v4.18-rc1] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode-even-lazier/20180621-045620 config: x86_64-allyesdebian (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All warnings (new ones prefixed by >>): In file included from include/linux/cpumask.h:12:0, from arch/x86/include/asm/cpumask.h:5, from arch/x86/include/asm/msr.h:11, from arch/x86/include/asm/processor.h:21, from arch/x86/include/asm/cpufeature.h:5, from arch/x86/include/asm/thread_info.h:53, from include/linux/thread_info.h:38, from arch/x86/include/asm/preempt.h:7, from include/linux/preempt.h:81, from include/linux/spinlock.h:51, from include/linux/mmzone.h:8, from include/linux/gfp.h:6, from include/linux/mm.h:10, from arch/x86/platform/efi/efi_64.c:23: In function 'bitmap_zero.constprop', inlined from 'cpumask_clear.constprop' at include/linux/cpumask.h:378:2, inlined from 'efi_alloc_page_tables' at include/linux/mm_types.h:512:2: >> include/linux/bitmap.h:208:3: warning: 'memset' writing 64 bytes into a >> region of size 0 overflows the destination [-Wstringop-overflow=] memset(dst, 0, len); ^~~ vim +/memset +208 include/linux/bitmap.h ^1da177e Linus Torvalds 2005-04-16 198 4b0bc0bc Rusty Russell2008-12-30 199 #define small_const_nbits(nbits) \ 4b0bc0bc Rusty Russell2008-12-30 200 (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) 4b0bc0bc Rusty Russell2008-12-30 201 8b4daad5 Rasmus Villemoes 2015-02-12 202 static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) ^1da177e Linus Torvalds 2005-04-16 203 { 4b0bc0bc Rusty Russell2008-12-30 204 if (small_const_nbits(nbits)) ^1da177e Linus Torvalds 2005-04-16 205 *dst = 0UL; ^1da177e Linus Torvalds 2005-04-16 206 else { 8b4daad5 Rasmus Villemoes 2015-02-12 207 unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); ^1da177e Linus Torvalds 2005-04-16 @208 memset(dst, 0, len); ^1da177e Linus Torvalds 2005-04-16 209 } ^1da177e Linus Torvalds 2005-04-16 210 } ^1da177e Linus Torvalds 2005-04-16 211 :: The code at line 208 was first introduced by commit :: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2 :: TO: Linus Torvalds :: CC: Linus Torvalds --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
Hi Rik, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on v4.17] [also build test WARNING on next-20180620] [cannot apply to tip/x86/core linus/master mmotm/master v4.18-rc1] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode-even-lazier/20180621-045620 config: x86_64-allyesdebian (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All warnings (new ones prefixed by >>): In file included from include/linux/cpumask.h:12:0, from arch/x86/include/asm/cpumask.h:5, from arch/x86/include/asm/msr.h:11, from arch/x86/include/asm/processor.h:21, from arch/x86/include/asm/cpufeature.h:5, from arch/x86/include/asm/thread_info.h:53, from include/linux/thread_info.h:38, from arch/x86/include/asm/preempt.h:7, from include/linux/preempt.h:81, from include/linux/spinlock.h:51, from include/linux/mmzone.h:8, from include/linux/gfp.h:6, from include/linux/mm.h:10, from arch/x86/platform/efi/efi_64.c:23: In function 'bitmap_zero.constprop', inlined from 'cpumask_clear.constprop' at include/linux/cpumask.h:378:2, inlined from 'efi_alloc_page_tables' at include/linux/mm_types.h:512:2: >> include/linux/bitmap.h:208:3: warning: 'memset' writing 64 bytes into a >> region of size 0 overflows the destination [-Wstringop-overflow=] memset(dst, 0, len); ^~~ vim +/memset +208 include/linux/bitmap.h ^1da177e Linus Torvalds 2005-04-16 198 4b0bc0bc Rusty Russell2008-12-30 199 #define small_const_nbits(nbits) \ 4b0bc0bc Rusty Russell2008-12-30 200 (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) 4b0bc0bc Rusty Russell2008-12-30 201 8b4daad5 Rasmus Villemoes 2015-02-12 202 static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) ^1da177e Linus Torvalds 2005-04-16 203 { 4b0bc0bc Rusty Russell2008-12-30 204 if (small_const_nbits(nbits)) ^1da177e Linus Torvalds 2005-04-16 205 *dst = 0UL; ^1da177e Linus Torvalds 2005-04-16 206 else { 8b4daad5 Rasmus Villemoes 2015-02-12 207 unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); ^1da177e Linus Torvalds 2005-04-16 @208 memset(dst, 0, len); ^1da177e Linus Torvalds 2005-04-16 209 } ^1da177e Linus Torvalds 2005-04-16 210 } ^1da177e Linus Torvalds 2005-04-16 211 :: The code at line 208 was first introduced by commit :: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2 :: TO: Linus Torvalds :: CC: Linus Torvalds --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
Hi Rik, Thank you for the patch! Yet something to improve: [auto build test ERROR on v4.17] [also build test ERROR on next-20180620] [cannot apply to tip/x86/core linus/master mmotm/master v4.18-rc1] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode-even-lazier/20180621-045620 config: x86_64-randconfig-x016-201824 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 Note: the linux-review/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode-even-lazier/20180621-045620 HEAD 7f2e7d915758c367dd0515efc17af977592fa141 builds fine. It only hurts bisectibility. All errors (new ones prefixed by >>): >> mm/init-mm.c:38:1: error: Only string constants are supported as >> initializers for randomized structures with flexible arrays }; ^ vim +38 mm/init-mm.c bb1f17b0 Alexey Dobriyan 2009-06-16 17 c59b389d Rik van Riel 2018-06-20 18 /* c59b389d Rik van Riel 2018-06-20 19 * For dynamically allocated mm_structs, there is a dynamically sized cpumask c59b389d Rik van Riel 2018-06-20 20 * at the end of the structure, the size of which depends on nr_cpu_ids. That c59b389d Rik van Riel 2018-06-20 21 * way we allocate only as much memory for mm_cpumask() as needed for the c59b389d Rik van Riel 2018-06-20 22 * hundreds, or thousands of processes that a system typically runs. c59b389d Rik van Riel 2018-06-20 23 * c59b389d Rik van Riel 2018-06-20 24 * Since there is only one init_mm in the entire system, keep it simple c59b389d Rik van Riel 2018-06-20 25 * and size this cpu_bitmask to NR_CPUS. c59b389d Rik van Riel 2018-06-20 26 */ bb1f17b0 Alexey Dobriyan 2009-06-16 27 struct mm_struct init_mm = { bb1f17b0 Alexey Dobriyan 2009-06-16 28 .mm_rb = RB_ROOT, bb1f17b0 Alexey Dobriyan 2009-06-16 29 .pgd= swapper_pg_dir, bb1f17b0 Alexey Dobriyan 2009-06-16 30 .mm_users = ATOMIC_INIT(2), bb1f17b0 Alexey Dobriyan 2009-06-16 31 .mm_count = ATOMIC_INIT(1), bb1f17b0 Alexey Dobriyan 2009-06-16 32 .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), bb1f17b0 Alexey Dobriyan 2009-06-16 33 .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), bb1f17b0 Alexey Dobriyan 2009-06-16 34 .mmlist = LIST_HEAD_INIT(init_mm.mmlist), bfedb589 Eric W. Biederman 2016-10-13 35 .user_ns= _user_ns, c59b389d Rik van Riel 2018-06-20 36 .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, a1b200e2 Heiko Carstens2010-08-09 37 INIT_MM_CONTEXT(init_mm) bb1f17b0 Alexey Dobriyan 2009-06-16 @38 }; :: The code at line 38 was first introduced by commit :: bb1f17b0372de93758653ca3454bc0df18dc2e5c mm: consolidate init_mm definition :: TO: Alexey Dobriyan :: CC: Linus Torvalds --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
Re: [PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
Hi Rik, Thank you for the patch! Yet something to improve: [auto build test ERROR on v4.17] [also build test ERROR on next-20180620] [cannot apply to tip/x86/core linus/master mmotm/master v4.18-rc1] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode-even-lazier/20180621-045620 config: x86_64-randconfig-x016-201824 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 Note: the linux-review/Rik-van-Riel/x86-tlb-mm-make-lazy-TLB-mode-even-lazier/20180621-045620 HEAD 7f2e7d915758c367dd0515efc17af977592fa141 builds fine. It only hurts bisectibility. All errors (new ones prefixed by >>): >> mm/init-mm.c:38:1: error: Only string constants are supported as >> initializers for randomized structures with flexible arrays }; ^ vim +38 mm/init-mm.c bb1f17b0 Alexey Dobriyan 2009-06-16 17 c59b389d Rik van Riel 2018-06-20 18 /* c59b389d Rik van Riel 2018-06-20 19 * For dynamically allocated mm_structs, there is a dynamically sized cpumask c59b389d Rik van Riel 2018-06-20 20 * at the end of the structure, the size of which depends on nr_cpu_ids. That c59b389d Rik van Riel 2018-06-20 21 * way we allocate only as much memory for mm_cpumask() as needed for the c59b389d Rik van Riel 2018-06-20 22 * hundreds, or thousands of processes that a system typically runs. c59b389d Rik van Riel 2018-06-20 23 * c59b389d Rik van Riel 2018-06-20 24 * Since there is only one init_mm in the entire system, keep it simple c59b389d Rik van Riel 2018-06-20 25 * and size this cpu_bitmask to NR_CPUS. c59b389d Rik van Riel 2018-06-20 26 */ bb1f17b0 Alexey Dobriyan 2009-06-16 27 struct mm_struct init_mm = { bb1f17b0 Alexey Dobriyan 2009-06-16 28 .mm_rb = RB_ROOT, bb1f17b0 Alexey Dobriyan 2009-06-16 29 .pgd= swapper_pg_dir, bb1f17b0 Alexey Dobriyan 2009-06-16 30 .mm_users = ATOMIC_INIT(2), bb1f17b0 Alexey Dobriyan 2009-06-16 31 .mm_count = ATOMIC_INIT(1), bb1f17b0 Alexey Dobriyan 2009-06-16 32 .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), bb1f17b0 Alexey Dobriyan 2009-06-16 33 .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), bb1f17b0 Alexey Dobriyan 2009-06-16 34 .mmlist = LIST_HEAD_INIT(init_mm.mmlist), bfedb589 Eric W. Biederman 2016-10-13 35 .user_ns= _user_ns, c59b389d Rik van Riel 2018-06-20 36 .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, a1b200e2 Heiko Carstens2010-08-09 37 INIT_MM_CONTEXT(init_mm) bb1f17b0 Alexey Dobriyan 2009-06-16 @38 }; :: The code at line 38 was first introduced by commit :: bb1f17b0372de93758653ca3454bc0df18dc2e5c mm: consolidate init_mm definition :: TO: Alexey Dobriyan :: CC: Linus Torvalds --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Signed-off-by: Rik van Riel Tested-by: Song Liu --- include/linux/mm_types.h | 18 -- kernel/fork.c| 14 -- mm/init-mm.c | 10 ++ 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 21612347d311..8e91632958f3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -427,8 +427,6 @@ struct mm_struct { struct linux_binfmt *binfmt; - cpumask_var_t cpu_vm_mask_var; - /* Architecture-specific MM context */ mm_context_t context; @@ -465,9 +463,6 @@ struct mm_struct { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif -#ifdef CONFIG_CPUMASK_OFFSTACK - struct cpumask cpumask_allocation; -#endif #ifdef CONFIG_NUMA_BALANCING /* * numa_next_scan is the next time that the PTEs will be marked @@ -502,22 +497,25 @@ struct mm_struct { /* HMM needs to track a few things per mm */ struct hmm *hmm; #endif + + /* +* The mm_cpumask needs to be at the end of mm_struct, because it +* is dynamically sized based on nr_cpu_ids. +*/ + unsigned long cpu_bitmap[]; } __randomize_layout; extern struct mm_struct init_mm; static inline void mm_init_cpumask(struct mm_struct *mm) { -#ifdef CONFIG_CPUMASK_OFFSTACK - mm->cpu_vm_mask_var = >cpumask_allocation; -#endif - cpumask_clear(mm->cpu_vm_mask_var); + cpumask_clear((struct cpumask *)>cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { - return mm->cpu_vm_mask_var; + return (struct cpumask *)>cpu_bitmap; } struct mmu_gather; diff --git a/kernel/fork.c b/kernel/fork.c index a5d21c42acfc..c6a20bc78102 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2242,6 +2242,8 @@ static void sighand_ctor(void *data) void __init proc_caches_init(void) { + unsigned int mm_size; + sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@ -2258,15 +2260,15 @@ void __init proc_caches_init(void) sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + /* -* FIXME! The "sizeof(struct mm_struct)" currently includes the -* whole struct cpumask for the OFFSTACK case. We could change -* this to *only* allocate as much of it as required by the -* maximum number of CPU's we can ever have. The cpumask_allocation -* is at the end of the structure, exactly for that reason. +* The mm_cpumask is located at the end of mm_struct, and is +* dynamically sized based on nr_cpu_ids. */ + mm_size = sizeof(struct mm_struct) + cpumask_size(); + mm_cachep = kmem_cache_create_usercopy("mm_struct", - sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, offsetof(struct mm_struct, saved_auxv), sizeof_field(struct mm_struct, saved_auxv), diff --git a/mm/init-mm.c b/mm/init-mm.c index f94d5d15ebc0..20fe222fe4c0 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -15,6 +15,15 @@ #define INIT_MM_CONTEXT(name) #endif +/* + * For dynamically allocated mm_structs, there is a dynamically sized cpumask + * at the end of the structure, the size of which depends on nr_cpu_ids. That + * way we allocate only as much memory for mm_cpumask() as needed for the + * hundreds, or thousands of processes that a system typically runs. + * + * Since there is only one init_mm in the entire system, keep it simple + * and size this cpu_bitmask to NR_CPUS. + */ struct mm_struct init_mm = { .mm_rb = RB_ROOT, .pgd= swapper_pg_dir, @@ -24,5 +33,6 @@ struct mm_struct init_mm = { .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns= _user_ns, + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
[PATCH 1/7] mm: allocate mm_cpumask dynamically based on nr_cpu_ids
The mm_struct always contains a cpumask bitmap, regardless of CONFIG_CPUMASK_OFFSTACK. That means the first step can be to simplify things, and simply have one bitmask at the end of the mm_struct for the mm_cpumask. The second step is to determine the correct size for the mm_struct slab object from the size of the mm_struct (excluding the cpu bitmap) and the size the cpumask. For init_mm we can simply allocate the maximum size this kernel is compiled for, since we only have one init_mm in the system, anyway. Signed-off-by: Rik van Riel Tested-by: Song Liu --- include/linux/mm_types.h | 18 -- kernel/fork.c| 14 -- mm/init-mm.c | 10 ++ 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 21612347d311..8e91632958f3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -427,8 +427,6 @@ struct mm_struct { struct linux_binfmt *binfmt; - cpumask_var_t cpu_vm_mask_var; - /* Architecture-specific MM context */ mm_context_t context; @@ -465,9 +463,6 @@ struct mm_struct { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif -#ifdef CONFIG_CPUMASK_OFFSTACK - struct cpumask cpumask_allocation; -#endif #ifdef CONFIG_NUMA_BALANCING /* * numa_next_scan is the next time that the PTEs will be marked @@ -502,22 +497,25 @@ struct mm_struct { /* HMM needs to track a few things per mm */ struct hmm *hmm; #endif + + /* +* The mm_cpumask needs to be at the end of mm_struct, because it +* is dynamically sized based on nr_cpu_ids. +*/ + unsigned long cpu_bitmap[]; } __randomize_layout; extern struct mm_struct init_mm; static inline void mm_init_cpumask(struct mm_struct *mm) { -#ifdef CONFIG_CPUMASK_OFFSTACK - mm->cpu_vm_mask_var = >cpumask_allocation; -#endif - cpumask_clear(mm->cpu_vm_mask_var); + cpumask_clear((struct cpumask *)>cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { - return mm->cpu_vm_mask_var; + return (struct cpumask *)>cpu_bitmap; } struct mmu_gather; diff --git a/kernel/fork.c b/kernel/fork.c index a5d21c42acfc..c6a20bc78102 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2242,6 +2242,8 @@ static void sighand_ctor(void *data) void __init proc_caches_init(void) { + unsigned int mm_size; + sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@ -2258,15 +2260,15 @@ void __init proc_caches_init(void) sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + /* -* FIXME! The "sizeof(struct mm_struct)" currently includes the -* whole struct cpumask for the OFFSTACK case. We could change -* this to *only* allocate as much of it as required by the -* maximum number of CPU's we can ever have. The cpumask_allocation -* is at the end of the structure, exactly for that reason. +* The mm_cpumask is located at the end of mm_struct, and is +* dynamically sized based on nr_cpu_ids. */ + mm_size = sizeof(struct mm_struct) + cpumask_size(); + mm_cachep = kmem_cache_create_usercopy("mm_struct", - sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, offsetof(struct mm_struct, saved_auxv), sizeof_field(struct mm_struct, saved_auxv), diff --git a/mm/init-mm.c b/mm/init-mm.c index f94d5d15ebc0..20fe222fe4c0 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -15,6 +15,15 @@ #define INIT_MM_CONTEXT(name) #endif +/* + * For dynamically allocated mm_structs, there is a dynamically sized cpumask + * at the end of the structure, the size of which depends on nr_cpu_ids. That + * way we allocate only as much memory for mm_cpumask() as needed for the + * hundreds, or thousands of processes that a system typically runs. + * + * Since there is only one init_mm in the entire system, keep it simple + * and size this cpu_bitmask to NR_CPUS. + */ struct mm_struct init_mm = { .mm_rb = RB_ROOT, .pgd= swapper_pg_dir, @@ -24,5 +33,6 @@ struct mm_struct init_mm = { .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns= _user_ns, + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},