On 10/20/20 at 01:45pm, Rahul Gopakumar wrote:
> Hi Baoquan,
>
> We had some trouble applying the patch to problem commit and the latest
> upstream commit. Steven (CC'ed) helped us by providing the updated draft
> patch. We applied it on the latest commit
> (3e4fb4346c781068610d03c12b16c0cfb0fd24a3), and it doesn't look like
> improving the performance numbers.
Thanks for your feedback. From the code, I am sure what the problem is,
but I didn't test it on system with huge memory. Forget mentioning my
draft patch is based on akpm/master branch since it's a mm issue, it
might be a little different with linus's mainline kernel, sorry for the
inconvenience.
I will test and debug this on a server with 4T memory in our lab, and
update if any progress.
>
> Patch on latest commit - 20.161 secs
> Vanilla latest commit - 19.50 secs
Here, do you mean it even cost more time with the patch applied?
>
> Here is the draft patch we tried
>
> ------------------------
>
> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> index 8e7b8c6c576e..ff5fa4c3889e 100644
> --- a/arch/ia64/mm/init.c
> +++ b/arch/ia64/mm/init.c
> @@ -537,7 +537,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
>
> if (map_start < map_end)
> memmap_init_zone((unsigned long)(map_end - map_start),
> - args->nid, args->zone,
> page_to_pfn(map_start),
> + args->nid, args->zone,
> page_to_pfn(map_start), page_to_pfn(map_end),
> MEMINIT_EARLY, NULL);
> return 0;
> }
> @@ -547,7 +547,7 @@ memmap_init (unsigned long size, int nid, unsigned long
> zone,
> unsigned long start_pfn)
> {
> if (!vmem_map) {
> - memmap_init_zone(size, nid, zone, start_pfn,
> + memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
> MEMINIT_EARLY, NULL);
> } else {
> struct page *start;
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 16b799a0522c..65e34b370e33 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2416,7 +2416,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long
> pfn,
>
> extern void set_dma_reserve(unsigned long new_dma_reserve);
> extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned
> long,
> - enum meminit_context, struct vmem_altmap *);
> + unsigned long, enum meminit_context, struct vmem_altmap *);
> extern void setup_per_zone_wmarks(void);
> extern int __meminit init_per_zone_wmark_min(void);
> extern void mem_init(void);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index ce3e73e3a5c1..03fddd8f4b11 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -728,7 +728,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone,
> unsigned long start_pfn,
> * expects the zone spans the pfn range. All the pages in the range
> * are reserved so nobody should be touching them so we should be
> safe
> */
> - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
> + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
> MEMINIT_HOTPLUG, altmap);
>
> set_zone_contiguous(zone);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 780c8f023b28..fe80055ea59c 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5989,8 +5989,8 @@ overlap_memmap_init(unsigned long zone, unsigned long
> *pfn)
> * done. Non-atomic initialization, single-pass.
> */
> void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long
> zone,
> - unsigned long start_pfn, enum meminit_context context,
> - struct vmem_altmap *altmap)
> + unsigned long start_pfn, unsigned long zone_end_pfn,
> + enum meminit_context context, struct vmem_altmap *altmap)
> {
> unsigned long pfn, end_pfn = start_pfn + size;
> struct page *page;
> @@ -6024,7 +6024,7 @@ void __meminit memmap_init_zone(unsigned long size, int
> nid, unsigned long zone,
> if (context == MEMINIT_EARLY) {
> if (overlap_memmap_init(zone, &pfn))
> continue;
> - if (defer_init(nid, pfn, end_pfn))
> + if (defer_init(nid, pfn, zone_end_pfn))
> break;
> }
>
> @@ -6150,7 +6150,7 @@ void __meminit __weak memmap_init(unsigned long size,
> int nid,
>
> if (end_pfn > start_pfn) {
> size = end_pfn - start_pfn;
> - memmap_init_zone(size, nid, zone, start_pfn,
> + memmap_init_zone(size, nid, zone, start_pfn,
> range_end_pfn,
> MEMINIT_EARLY, NULL);
> }
> }
>
>
> ------------------------
>
> We have attached default dmesg logs and also dmesg logs collected with
> memblock=debug kernel cmdline for both vanilla and patched kernels. Let me
> know if you need more info.
>
>
>
> From: [email protected] <[email protected]>
> Sent: 13 October 2020 6:47 PM
> To: Rahul Gopakumar <[email protected]>
> Cc: [email protected] <[email protected]>; [email protected]
> <[email protected]>; [email protected]
> <[email protected]>; [email protected]
> <[email protected]>; [email protected]
> <[email protected]>; [email protected]
> <[email protected]>; [email protected]
> <[email protected]>; Rajender M <[email protected]>; Yiu Cho Lau
> <[email protected]>; Peter Jonasson <[email protected]>; Venkatesh
> Rajaram <[email protected]>
> Subject: Re: Performance regressions in "boot_time" tests in Linux 5.8 Kernel
>
> Hi Rahul,
>
> On 10/12/20 at 05:21pm, Rahul Gopakumar wrote:
> > Hi Baoquan,
> >
> > Attached collected dmesg logs for with and without
> > commit after adding memblock=debug to kernel cmdline.
>
> Can you test below draft patch and see if it works for you?
>
> From a2ea6caef3c73ad9efb2dd2b48039065fe430bb2 Mon Sep 17 00:00:00 2001
> From: Baoquan He <[email protected]>
> Date: Tue, 13 Oct 2020 20:05:30 +0800
> Subject: [PATCH] mm: make memmap defer init only take effect per zone
>
> Deferred struct page init is designed to work per zone. However since
> commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions
> rather that check each PFN"), the handling is mistakenly done in all memory
> ranges inside one zone. Especially in those unmovable zones of multiple nodes,
> memblock reservation split them into many memory ranges. This makes
> initialized struct page more than expected in early stage, then increases
> much boot time.
>
> Let's fix it to make the memmap defer init handled in zone wide, but not in
> memory range of one zone.
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> arch/ia64/mm/init.c | 4 ++--
> include/linux/mm.h | 5 +++--
> mm/memory_hotplug.c | 2 +-
> mm/page_alloc.c | 6 +++---
> 4 files changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> index ef12e097f318..27ca549ff47e 100644
> --- a/arch/ia64/mm/init.c
> +++ b/arch/ia64/mm/init.c
> @@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
>
> if (map_start < map_end)
> memmap_init_zone((unsigned long)(map_end - map_start),
> - args->nid, args->zone,
> page_to_pfn(map_start),
> + args->nid, args->zone,
> page_to_pfn(map_start), page_to_pfn(map_end),
> MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> return 0;
> }
> @@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid, unsigned long
> zone,
> unsigned long start_pfn)
> {
> if (!vmem_map) {
> - memmap_init_zone(size, nid, zone, start_pfn,
> + memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
> MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> } else {
> struct page *start;
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index ef360fe70aaf..5f9fc61d5be2 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2439,8 +2439,9 @@ extern int __meminit __early_pfn_to_nid(unsigned long
> pfn,
> #endif
>
> extern void set_dma_reserve(unsigned long new_dma_reserve);
> -extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned
> long,
> - enum meminit_context, struct vmem_altmap *, int migratetype);
> +extern void memmap_init_zone(unsigned long, int, unsigned long,
> + unsigned long, unsigned long, enum meminit_context,
> + struct vmem_altmap *, int migratetype);
> extern void setup_per_zone_wmarks(void);
> extern int __meminit init_per_zone_wmark_min(void);
> extern void mem_init(void);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index b44d4c7ba73b..f9a37e6abc1c 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -732,7 +732,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone,
> unsigned long start_pfn,
> * expects the zone spans the pfn range. All the pages in the range
> * are reserved so nobody should be touching them so we should be
> safe
> */
> - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
> + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
> MEMINIT_HOTPLUG, altmap, migratetype);
>
> set_zone_contiguous(zone);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 2ebf9ddafa3a..e8b19fdd18ec 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6044,7 +6044,7 @@ overlap_memmap_init(unsigned long zone, unsigned long
> *pfn)
> * zone stats (e.g., nr_isolate_pageblock) are touched.
> */
> void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long
> zone,
> - unsigned long start_pfn,
> + unsigned long start_pfn, unsigned long zone_end_pfn,
> enum meminit_context context,
> struct vmem_altmap *altmap, int migratetype)
> {
> @@ -6080,7 +6080,7 @@ void __meminit memmap_init_zone(unsigned long size, int
> nid, unsigned long zone,
> if (context == MEMINIT_EARLY) {
> if (overlap_memmap_init(zone, &pfn))
> continue;
> - if (defer_init(nid, pfn, end_pfn))
> + if (defer_init(nid, pfn, zone_end_pfn))
> break;
> }
>
> @@ -6194,7 +6194,7 @@ void __meminit __weak memmap_init(unsigned long size,
> int nid,
>
> if (end_pfn > start_pfn) {
> size = end_pfn - start_pfn;
> - memmap_init_zone(size, nid, zone, start_pfn,
> + memmap_init_zone(size, nid, zone, start_pfn,
> range_end_pfn,
> MEMINIT_EARLY, NULL,
> MIGRATE_MOVABLE);
> }
> }
> --
> 2.17.2