Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri, Nov 16, 2018 at 11:24:05AM +0100, Michal Hocko wrote: >On Thu 15-11-18 07:50:40, Wei Yang wrote: >[...] >> @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void) >> struct zone *zone; >> unsigned int cnt = 0; >> >> -for_each_populated_zone(zone) >> -if (is_highmem(zone)) >> +for_each_zone(zone) >> +if (populated_zone(zone) && is_highmem(zone)) >> cnt += zone_page_state(zone, NR_FREE_PAGES); > >this should be for_each_managed_zone because we only care about highmem >zones which have pages in the allocator (NR_FREE_PAGES). > >> >> return cnt; >> @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void) >> struct zone *zone; >> unsigned int n = 0; >> >> -for_each_populated_zone(zone) { >> +for_each_zone(zone) { >> unsigned long pfn, max_zone_pfn; >> >> -if (!is_highmem(zone)) >> +if (!populated_zone(zone) || !is_highmem(zone)) >> continue; >> >> mark_free_pages(zone); > >I am not familiar with this code much but I strongly suspect that we do >want for_each_managed_zone here because saveable_highmem_page skips over >all reserved pages which rules out the bootmem. But this should be >double checked with Rafael (Cc-ed). > >Rafael, does this loop care about pages which are not managed by the >page allocator? > Hi, Rafael Your opinion on this change and the following one is appreciated :-) >> @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void) >> unsigned long pfn, max_zone_pfn; >> unsigned int n = 0; >> >> -for_each_populated_zone(zone) { >> -if (is_highmem(zone)) >> +for_each_zone(zone) { >> +if (!populated_zone(zone) || is_highmem(zone)) >> continue; >> >> mark_free_pages(zone); >> @@ -1399,9 +1402,12 @@ static void copy_data_pages(struct memory_bitmap >> *copy_bm, >> struct zone *zone; >> unsigned long pfn; >> >> -for_each_populated_zone(zone) { >> +for_each_zone(zone) { >> unsigned long max_zone_pfn; >> >> +if (!populated_zone(zone)) >> +continue; >> + >> mark_free_pages(zone); >> max_zone_pfn = zone_end_pfn(zone); >> for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) >> @@ -1717,7 +1723,10 @@ int hibernate_preallocate_memory(void) >> saveable += save_highmem; >> highmem = save_highmem; >> size = 0; >> -for_each_populated_zone(zone) { >> +for_each_zone(zone) { >> +if (!populated_zone(zone)) >> +continue; >> + >> size += snapshot_additional_pages(zone); >> if (is_highmem(zone)) >> highmem += zone_page_state(zone, NR_FREE_PAGES); > >ditto for the above. > > >> @@ -1863,8 +1872,8 @@ static int enough_free_mem(unsigned int nr_pages, >> unsigned int nr_highmem) >> struct zone *zone; >> unsigned int free = alloc_normal; >> >> -for_each_populated_zone(zone) >> -if (!is_highmem(zone)) >> +for_each_zone(zone) >> +if (populated_zone(zone) && !is_highmem(zone)) >> free += zone_page_state(zone, NR_FREE_PAGES); >> >> nr_pages += count_pages_for_highmem(nr_highmem); > >This one should be for_each_managed_zone (NR_FREE_PAGES) > >The rest looks good to me. >-- >Michal Hocko >SUSE Labs -- Wei Yang Help you, Help me
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri, Nov 16, 2018 at 11:24:05AM +0100, Michal Hocko wrote: >On Thu 15-11-18 07:50:40, Wei Yang wrote: >[...] >> @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void) >> struct zone *zone; >> unsigned int cnt = 0; >> >> -for_each_populated_zone(zone) >> -if (is_highmem(zone)) >> +for_each_zone(zone) >> +if (populated_zone(zone) && is_highmem(zone)) >> cnt += zone_page_state(zone, NR_FREE_PAGES); > >this should be for_each_managed_zone because we only care about highmem >zones which have pages in the allocator (NR_FREE_PAGES). > >> >> return cnt; >> @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void) >> struct zone *zone; >> unsigned int n = 0; >> >> -for_each_populated_zone(zone) { >> +for_each_zone(zone) { >> unsigned long pfn, max_zone_pfn; >> >> -if (!is_highmem(zone)) >> +if (!populated_zone(zone) || !is_highmem(zone)) >> continue; >> >> mark_free_pages(zone); > >I am not familiar with this code much but I strongly suspect that we do >want for_each_managed_zone here because saveable_highmem_page skips over >all reserved pages which rules out the bootmem. But this should be >double checked with Rafael (Cc-ed). > >Rafael, does this loop care about pages which are not managed by the >page allocator? > Hi, Rafael Your opinion on this change and the following one is appreciated :-) >> @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void) >> unsigned long pfn, max_zone_pfn; >> unsigned int n = 0; >> >> -for_each_populated_zone(zone) { >> -if (is_highmem(zone)) >> +for_each_zone(zone) { >> +if (!populated_zone(zone) || is_highmem(zone)) >> continue; >> >> mark_free_pages(zone); >> @@ -1399,9 +1402,12 @@ static void copy_data_pages(struct memory_bitmap >> *copy_bm, >> struct zone *zone; >> unsigned long pfn; >> >> -for_each_populated_zone(zone) { >> +for_each_zone(zone) { >> unsigned long max_zone_pfn; >> >> +if (!populated_zone(zone)) >> +continue; >> + >> mark_free_pages(zone); >> max_zone_pfn = zone_end_pfn(zone); >> for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) >> @@ -1717,7 +1723,10 @@ int hibernate_preallocate_memory(void) >> saveable += save_highmem; >> highmem = save_highmem; >> size = 0; >> -for_each_populated_zone(zone) { >> +for_each_zone(zone) { >> +if (!populated_zone(zone)) >> +continue; >> + >> size += snapshot_additional_pages(zone); >> if (is_highmem(zone)) >> highmem += zone_page_state(zone, NR_FREE_PAGES); > >ditto for the above. > > >> @@ -1863,8 +1872,8 @@ static int enough_free_mem(unsigned int nr_pages, >> unsigned int nr_highmem) >> struct zone *zone; >> unsigned int free = alloc_normal; >> >> -for_each_populated_zone(zone) >> -if (!is_highmem(zone)) >> +for_each_zone(zone) >> +if (populated_zone(zone) && !is_highmem(zone)) >> free += zone_page_state(zone, NR_FREE_PAGES); >> >> nr_pages += count_pages_for_highmem(nr_highmem); > >This one should be for_each_managed_zone (NR_FREE_PAGES) > >The rest looks good to me. >-- >Michal Hocko >SUSE Labs -- Wei Yang Help you, Help me
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri 16-11-18 15:58:28, Wei Yang wrote: > On Fri, Nov 16, 2018 at 12:26:03PM +0100, Michal Hocko wrote: > >On Fri 16-11-18 12:05:04, osalvador wrote: > >> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote: > >[...] > >> > E.g. memory hotplug decreases both managed and present counters. I > >> > am actually not sure that is 100% correct (put on my TODO list to > >> > check). There is no consistency in that regards. > >> > >> We can only offline non-reserved pages (so, managed pages). > > > >Yes > > > >> Since present pages holds reserved_pages + managed_pages, decreasing > >> both should be fine unless I am mistaken. > > > >Well, present_pages is defined as "physical pages existing within the zone" > >and those pages are still existing but they are offline. But as I've > >said I have to think about it some more > > I may not catch up with your discussions, while I'd like to share what I > learnt. > > online_pages() > online_pages_range() > zone->present_pages += onlined_pages; > > __offline_pages() > adjust_managed_page_count() > zone->present_pages -= offlined_pages; > > The two counters: present_pages & managed_pages would be adjusted during > online/offline. > > While I am not sure when *reserved_pages* would be adjusted. Will we add > this hot-added memory into memblock.reserved? and allocate memory by > memblock_alloc() after system bootup? This is not really related to this patch. I have only mentioned the memory hotplug as an example. I would rather focus on the change itself so let's not get too off topic here. -- Michal Hocko SUSE Labs
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri 16-11-18 15:58:28, Wei Yang wrote: > On Fri, Nov 16, 2018 at 12:26:03PM +0100, Michal Hocko wrote: > >On Fri 16-11-18 12:05:04, osalvador wrote: > >> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote: > >[...] > >> > E.g. memory hotplug decreases both managed and present counters. I > >> > am actually not sure that is 100% correct (put on my TODO list to > >> > check). There is no consistency in that regards. > >> > >> We can only offline non-reserved pages (so, managed pages). > > > >Yes > > > >> Since present pages holds reserved_pages + managed_pages, decreasing > >> both should be fine unless I am mistaken. > > > >Well, present_pages is defined as "physical pages existing within the zone" > >and those pages are still existing but they are offline. But as I've > >said I have to think about it some more > > I may not catch up with your discussions, while I'd like to share what I > learnt. > > online_pages() > online_pages_range() > zone->present_pages += onlined_pages; > > __offline_pages() > adjust_managed_page_count() > zone->present_pages -= offlined_pages; > > The two counters: present_pages & managed_pages would be adjusted during > online/offline. > > While I am not sure when *reserved_pages* would be adjusted. Will we add > this hot-added memory into memblock.reserved? and allocate memory by > memblock_alloc() after system bootup? This is not really related to this patch. I have only mentioned the memory hotplug as an example. I would rather focus on the change itself so let's not get too off topic here. -- Michal Hocko SUSE Labs
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri, Nov 16, 2018 at 12:26:03PM +0100, Michal Hocko wrote: >On Fri 16-11-18 12:05:04, osalvador wrote: >> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote: >[...] >> > E.g. memory hotplug decreases both managed and present counters. I >> > am actually not sure that is 100% correct (put on my TODO list to >> > check). There is no consistency in that regards. >> >> We can only offline non-reserved pages (so, managed pages). > >Yes > >> Since present pages holds reserved_pages + managed_pages, decreasing >> both should be fine unless I am mistaken. > >Well, present_pages is defined as "physical pages existing within the zone" >and those pages are still existing but they are offline. But as I've >said I have to think about it some more I may not catch up with your discussions, while I'd like to share what I learnt. online_pages() online_pages_range() zone->present_pages += onlined_pages; __offline_pages() adjust_managed_page_count() zone->present_pages -= offlined_pages; The two counters: present_pages & managed_pages would be adjusted during online/offline. While I am not sure when *reserved_pages* would be adjusted. Will we add this hot-added memory into memblock.reserved? and allocate memory by memblock_alloc() after system bootup? >-- >Michal Hocko >SUSE Labs -- Wei Yang Help you, Help me
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri, Nov 16, 2018 at 12:26:03PM +0100, Michal Hocko wrote: >On Fri 16-11-18 12:05:04, osalvador wrote: >> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote: >[...] >> > E.g. memory hotplug decreases both managed and present counters. I >> > am actually not sure that is 100% correct (put on my TODO list to >> > check). There is no consistency in that regards. >> >> We can only offline non-reserved pages (so, managed pages). > >Yes > >> Since present pages holds reserved_pages + managed_pages, decreasing >> both should be fine unless I am mistaken. > >Well, present_pages is defined as "physical pages existing within the zone" >and those pages are still existing but they are offline. But as I've >said I have to think about it some more I may not catch up with your discussions, while I'd like to share what I learnt. online_pages() online_pages_range() zone->present_pages += onlined_pages; __offline_pages() adjust_managed_page_count() zone->present_pages -= offlined_pages; The two counters: present_pages & managed_pages would be adjusted during online/offline. While I am not sure when *reserved_pages* would be adjusted. Will we add this hot-added memory into memblock.reserved? and allocate memory by memblock_alloc() after system bootup? >-- >Michal Hocko >SUSE Labs -- Wei Yang Help you, Help me
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri 16-11-18 12:05:04, osalvador wrote: > On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote: [...] > > E.g. memory hotplug decreases both managed and present counters. I > > am actually not sure that is 100% correct (put on my TODO list to > > check). There is no consistency in that regards. > > We can only offline non-reserved pages (so, managed pages). Yes > Since present pages holds reserved_pages + managed_pages, decreasing > both should be fine unless I am mistaken. Well, present_pages is defined as "physical pages existing within the zone" and those pages are still existing but they are offline. But as I've said I have to think about it some more -- Michal Hocko SUSE Labs
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri 16-11-18 12:05:04, osalvador wrote: > On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote: [...] > > E.g. memory hotplug decreases both managed and present counters. I > > am actually not sure that is 100% correct (put on my TODO list to > > check). There is no consistency in that regards. > > We can only offline non-reserved pages (so, managed pages). Yes > Since present pages holds reserved_pages + managed_pages, decreasing > both should be fine unless I am mistaken. Well, present_pages is defined as "physical pages existing within the zone" and those pages are still existing but they are offline. But as I've said I have to think about it some more -- Michal Hocko SUSE Labs
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote: > On Thu 15-11-18 13:37:35, Andrew Morton wrote: > [...] > > Worse, the situations in which managed_zone() != populated_zone() > > are > > rare(?), so it will take a long time for problems to be discovered, > > I > > expect. > > We would basically have to deplete the whole zone by the bootmem > allocator or pull out all pages from the page allocator. E.g. memory > hotplug decreases both managed and present counters. I am actually > not > sure that is 100% correct (put on my TODO list to check). There is no > consistency in that regards. We can only offline non-reserved pages (so, managed pages). Since present pages holds reserved_pages + managed_pages, decreasing both should be fine unless I am mistaken. Oscar Salvador
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote: > On Thu 15-11-18 13:37:35, Andrew Morton wrote: > [...] > > Worse, the situations in which managed_zone() != populated_zone() > > are > > rare(?), so it will take a long time for problems to be discovered, > > I > > expect. > > We would basically have to deplete the whole zone by the bootmem > allocator or pull out all pages from the page allocator. E.g. memory > hotplug decreases both managed and present counters. I am actually > not > sure that is 100% correct (put on my TODO list to check). There is no > consistency in that regards. We can only offline non-reserved pages (so, managed pages). Since present pages holds reserved_pages + managed_pages, decreasing both should be fine unless I am mistaken. Oscar Salvador
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Thu 15-11-18 07:50:40, Wei Yang wrote: [...] > @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void) > struct zone *zone; > unsigned int cnt = 0; > > - for_each_populated_zone(zone) > - if (is_highmem(zone)) > + for_each_zone(zone) > + if (populated_zone(zone) && is_highmem(zone)) > cnt += zone_page_state(zone, NR_FREE_PAGES); this should be for_each_managed_zone because we only care about highmem zones which have pages in the allocator (NR_FREE_PAGES). > > return cnt; > @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void) > struct zone *zone; > unsigned int n = 0; > > - for_each_populated_zone(zone) { > + for_each_zone(zone) { > unsigned long pfn, max_zone_pfn; > > - if (!is_highmem(zone)) > + if (!populated_zone(zone) || !is_highmem(zone)) > continue; > > mark_free_pages(zone); I am not familiar with this code much but I strongly suspect that we do want for_each_managed_zone here because saveable_highmem_page skips over all reserved pages which rules out the bootmem. But this should be double checked with Rafael (Cc-ed). Rafael, does this loop care about pages which are not managed by the page allocator? > @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void) > unsigned long pfn, max_zone_pfn; > unsigned int n = 0; > > - for_each_populated_zone(zone) { > - if (is_highmem(zone)) > + for_each_zone(zone) { > + if (!populated_zone(zone) || is_highmem(zone)) > continue; > > mark_free_pages(zone); > @@ -1399,9 +1402,12 @@ static void copy_data_pages(struct memory_bitmap > *copy_bm, > struct zone *zone; > unsigned long pfn; > > - for_each_populated_zone(zone) { > + for_each_zone(zone) { > unsigned long max_zone_pfn; > > + if (!populated_zone(zone)) > + continue; > + > mark_free_pages(zone); > max_zone_pfn = zone_end_pfn(zone); > for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) > @@ -1717,7 +1723,10 @@ int hibernate_preallocate_memory(void) > saveable += save_highmem; > highmem = save_highmem; > size = 0; > - for_each_populated_zone(zone) { > + for_each_zone(zone) { > + if (!populated_zone(zone)) > + continue; > + > size += snapshot_additional_pages(zone); > if (is_highmem(zone)) > highmem += zone_page_state(zone, NR_FREE_PAGES); ditto for the above. > @@ -1863,8 +1872,8 @@ static int enough_free_mem(unsigned int nr_pages, > unsigned int nr_highmem) > struct zone *zone; > unsigned int free = alloc_normal; > > - for_each_populated_zone(zone) > - if (!is_highmem(zone)) > + for_each_zone(zone) > + if (populated_zone(zone) && !is_highmem(zone)) > free += zone_page_state(zone, NR_FREE_PAGES); > > nr_pages += count_pages_for_highmem(nr_highmem); This one should be for_each_managed_zone (NR_FREE_PAGES) The rest looks good to me. -- Michal Hocko SUSE Labs
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Thu 15-11-18 07:50:40, Wei Yang wrote: [...] > @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void) > struct zone *zone; > unsigned int cnt = 0; > > - for_each_populated_zone(zone) > - if (is_highmem(zone)) > + for_each_zone(zone) > + if (populated_zone(zone) && is_highmem(zone)) > cnt += zone_page_state(zone, NR_FREE_PAGES); this should be for_each_managed_zone because we only care about highmem zones which have pages in the allocator (NR_FREE_PAGES). > > return cnt; > @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void) > struct zone *zone; > unsigned int n = 0; > > - for_each_populated_zone(zone) { > + for_each_zone(zone) { > unsigned long pfn, max_zone_pfn; > > - if (!is_highmem(zone)) > + if (!populated_zone(zone) || !is_highmem(zone)) > continue; > > mark_free_pages(zone); I am not familiar with this code much but I strongly suspect that we do want for_each_managed_zone here because saveable_highmem_page skips over all reserved pages which rules out the bootmem. But this should be double checked with Rafael (Cc-ed). Rafael, does this loop care about pages which are not managed by the page allocator? > @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void) > unsigned long pfn, max_zone_pfn; > unsigned int n = 0; > > - for_each_populated_zone(zone) { > - if (is_highmem(zone)) > + for_each_zone(zone) { > + if (!populated_zone(zone) || is_highmem(zone)) > continue; > > mark_free_pages(zone); > @@ -1399,9 +1402,12 @@ static void copy_data_pages(struct memory_bitmap > *copy_bm, > struct zone *zone; > unsigned long pfn; > > - for_each_populated_zone(zone) { > + for_each_zone(zone) { > unsigned long max_zone_pfn; > > + if (!populated_zone(zone)) > + continue; > + > mark_free_pages(zone); > max_zone_pfn = zone_end_pfn(zone); > for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) > @@ -1717,7 +1723,10 @@ int hibernate_preallocate_memory(void) > saveable += save_highmem; > highmem = save_highmem; > size = 0; > - for_each_populated_zone(zone) { > + for_each_zone(zone) { > + if (!populated_zone(zone)) > + continue; > + > size += snapshot_additional_pages(zone); > if (is_highmem(zone)) > highmem += zone_page_state(zone, NR_FREE_PAGES); ditto for the above. > @@ -1863,8 +1872,8 @@ static int enough_free_mem(unsigned int nr_pages, > unsigned int nr_highmem) > struct zone *zone; > unsigned int free = alloc_normal; > > - for_each_populated_zone(zone) > - if (!is_highmem(zone)) > + for_each_zone(zone) > + if (populated_zone(zone) && !is_highmem(zone)) > free += zone_page_state(zone, NR_FREE_PAGES); > > nr_pages += count_pages_for_highmem(nr_highmem); This one should be for_each_managed_zone (NR_FREE_PAGES) The rest looks good to me. -- Michal Hocko SUSE Labs
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Thu 15-11-18 13:37:35, Andrew Morton wrote: [...] > Worse, the situations in which managed_zone() != populated_zone() are > rare(?), so it will take a long time for problems to be discovered, I > expect. We would basically have to deplete the whole zone by the bootmem allocator or pull out all pages from the page allocator. E.g. memory hotplug decreases both managed and present counters. I am actually not sure that is 100% correct (put on my TODO list to check). There is no consistency in that regards. That being said, I will review the patch (today hopefully) but fundamentally most users should indeed care about managed pages when iterating zones with memory. There should be a good reason why they might want to look at reserved pages. -- Michal Hocko SUSE Labs
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Thu 15-11-18 13:37:35, Andrew Morton wrote: [...] > Worse, the situations in which managed_zone() != populated_zone() are > rare(?), so it will take a long time for problems to be discovered, I > expect. We would basically have to deplete the whole zone by the bootmem allocator or pull out all pages from the page allocator. E.g. memory hotplug decreases both managed and present counters. I am actually not sure that is 100% correct (put on my TODO list to check). There is no consistency in that regards. That being said, I will review the patch (today hopefully) but fundamentally most users should indeed care about managed pages when iterating zones with memory. There should be a good reason why they might want to look at reserved pages. -- Michal Hocko SUSE Labs
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Thu, Nov 15, 2018 at 01:37:35PM -0800, Andrew Morton wrote: >On Thu, 15 Nov 2018 07:50:40 +0800 Wei Yang wrote: > >> For one zone, there are three digits to describe its space range: >> >> spanned_pages >> present_pages >> managed_pages >> >> The detailed meaning is written in include/linux/mmzone.h. This patch >> concerns about the last two. >> >> present_pages is physical pages existing within the zone >> managed_pages is present pages managed by the buddy system >> >> >From the definition, managed_pages is a more strict condition than >> present_pages. >> >> There are two functions using zone's present_pages as a boundary: >> >> populated_zone() >> for_each_populated_zone() >> >> By going through the kernel tree, most of their users are willing to >> access pages managed by the buddy system, which means it is more exact >> to check zone's managed_pages for a validation. >> >> This patch replaces those checks on present_pages to managed_pages by: >> >> * change for_each_populated_zone() to for_each_managed_zone() >> * convert for_each_populated_zone() to for_each_zone() and check >> populated_zone() where is necessary >> * change populated_zone() to managed_zone() at proper places >> >> Signed-off-by: Wei Yang >> >> --- >> >> Michal, after last mail, I did one more thing to replace >> populated_zone() with managed_zone() at proper places. >> >> One thing I am not sure is those places in mm/compaction.c. I have >> chaged them. If not, please let me know. >> >> BTW, I did a boot up test with the patched kernel and looks smooth. > >Seems sensible, but a bit scary. A basic boot test is unlikely to >expose subtle gremlins. > Agree. >Worse, the situations in which managed_zone() != populated_zone() are >rare(?), so it will take a long time for problems to be discovered, I >expect. Hmm... I created a virtual machine with 4 nodes, which has total 6 populated zones. All of them are different. This is a little bit out of my expactation. > >I'll toss it in there for now, let's see who breaks :( Thanks. -- Wei Yang Help you, Help me
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Thu, Nov 15, 2018 at 01:37:35PM -0800, Andrew Morton wrote: >On Thu, 15 Nov 2018 07:50:40 +0800 Wei Yang wrote: > >> For one zone, there are three digits to describe its space range: >> >> spanned_pages >> present_pages >> managed_pages >> >> The detailed meaning is written in include/linux/mmzone.h. This patch >> concerns about the last two. >> >> present_pages is physical pages existing within the zone >> managed_pages is present pages managed by the buddy system >> >> >From the definition, managed_pages is a more strict condition than >> present_pages. >> >> There are two functions using zone's present_pages as a boundary: >> >> populated_zone() >> for_each_populated_zone() >> >> By going through the kernel tree, most of their users are willing to >> access pages managed by the buddy system, which means it is more exact >> to check zone's managed_pages for a validation. >> >> This patch replaces those checks on present_pages to managed_pages by: >> >> * change for_each_populated_zone() to for_each_managed_zone() >> * convert for_each_populated_zone() to for_each_zone() and check >> populated_zone() where is necessary >> * change populated_zone() to managed_zone() at proper places >> >> Signed-off-by: Wei Yang >> >> --- >> >> Michal, after last mail, I did one more thing to replace >> populated_zone() with managed_zone() at proper places. >> >> One thing I am not sure is those places in mm/compaction.c. I have >> chaged them. If not, please let me know. >> >> BTW, I did a boot up test with the patched kernel and looks smooth. > >Seems sensible, but a bit scary. A basic boot test is unlikely to >expose subtle gremlins. > Agree. >Worse, the situations in which managed_zone() != populated_zone() are >rare(?), so it will take a long time for problems to be discovered, I >expect. Hmm... I created a virtual machine with 4 nodes, which has total 6 populated zones. All of them are different. This is a little bit out of my expactation. > >I'll toss it in there for now, let's see who breaks :( Thanks. -- Wei Yang Help you, Help me
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Thu, 15 Nov 2018 07:50:40 +0800 Wei Yang wrote: > For one zone, there are three digits to describe its space range: > > spanned_pages > present_pages > managed_pages > > The detailed meaning is written in include/linux/mmzone.h. This patch > concerns about the last two. > > present_pages is physical pages existing within the zone > managed_pages is present pages managed by the buddy system > > >From the definition, managed_pages is a more strict condition than > present_pages. > > There are two functions using zone's present_pages as a boundary: > > populated_zone() > for_each_populated_zone() > > By going through the kernel tree, most of their users are willing to > access pages managed by the buddy system, which means it is more exact > to check zone's managed_pages for a validation. > > This patch replaces those checks on present_pages to managed_pages by: > > * change for_each_populated_zone() to for_each_managed_zone() > * convert for_each_populated_zone() to for_each_zone() and check > populated_zone() where is necessary > * change populated_zone() to managed_zone() at proper places > > Signed-off-by: Wei Yang > > --- > > Michal, after last mail, I did one more thing to replace > populated_zone() with managed_zone() at proper places. > > One thing I am not sure is those places in mm/compaction.c. I have > chaged them. If not, please let me know. > > BTW, I did a boot up test with the patched kernel and looks smooth. Seems sensible, but a bit scary. A basic boot test is unlikely to expose subtle gremlins. Worse, the situations in which managed_zone() != populated_zone() are rare(?), so it will take a long time for problems to be discovered, I expect. I'll toss it in there for now, let's see who breaks :(
Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration
On Thu, 15 Nov 2018 07:50:40 +0800 Wei Yang wrote: > For one zone, there are three digits to describe its space range: > > spanned_pages > present_pages > managed_pages > > The detailed meaning is written in include/linux/mmzone.h. This patch > concerns about the last two. > > present_pages is physical pages existing within the zone > managed_pages is present pages managed by the buddy system > > >From the definition, managed_pages is a more strict condition than > present_pages. > > There are two functions using zone's present_pages as a boundary: > > populated_zone() > for_each_populated_zone() > > By going through the kernel tree, most of their users are willing to > access pages managed by the buddy system, which means it is more exact > to check zone's managed_pages for a validation. > > This patch replaces those checks on present_pages to managed_pages by: > > * change for_each_populated_zone() to for_each_managed_zone() > * convert for_each_populated_zone() to for_each_zone() and check > populated_zone() where is necessary > * change populated_zone() to managed_zone() at proper places > > Signed-off-by: Wei Yang > > --- > > Michal, after last mail, I did one more thing to replace > populated_zone() with managed_zone() at proper places. > > One thing I am not sure is those places in mm/compaction.c. I have > chaged them. If not, please let me know. > > BTW, I did a boot up test with the patched kernel and looks smooth. Seems sensible, but a bit scary. A basic boot test is unlikely to expose subtle gremlins. Worse, the situations in which managed_zone() != populated_zone() are rare(?), so it will take a long time for problems to be discovered, I expect. I'll toss it in there for now, let's see who breaks :(
[PATCH] mm: use managed_zone() for more exact check in zone iteration
For one zone, there are three digits to describe its space range: spanned_pages present_pages managed_pages The detailed meaning is written in include/linux/mmzone.h. This patch concerns about the last two. present_pages is physical pages existing within the zone managed_pages is present pages managed by the buddy system >From the definition, managed_pages is a more strict condition than present_pages. There are two functions using zone's present_pages as a boundary: populated_zone() for_each_populated_zone() By going through the kernel tree, most of their users are willing to access pages managed by the buddy system, which means it is more exact to check zone's managed_pages for a validation. This patch replaces those checks on present_pages to managed_pages by: * change for_each_populated_zone() to for_each_managed_zone() * convert for_each_populated_zone() to for_each_zone() and check populated_zone() where is necessary * change populated_zone() to managed_zone() at proper places Signed-off-by: Wei Yang --- Michal, after last mail, I did one more thing to replace populated_zone() with managed_zone() at proper places. One thing I am not sure is those places in mm/compaction.c. I have chaged them. If not, please let me know. BTW, I did a boot up test with the patched kernel and looks smooth. --- arch/s390/mm/page-states.c | 2 +- include/linux/mmzone.h | 8 +++- kernel/power/snapshot.c| 31 --- mm/compaction.c| 8 mm/highmem.c | 5 ++--- mm/huge_memory.c | 2 +- mm/khugepaged.c| 2 +- mm/madvise.c | 2 +- mm/migrate.c | 2 +- mm/page-writeback.c| 4 ++-- mm/page_alloc.c| 19 +-- mm/vmstat.c| 14 +++--- 12 files changed, 52 insertions(+), 47 deletions(-) diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index dc3cede7f2ec..015430bf0c63 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -265,7 +265,7 @@ void arch_set_page_states(int make_stable) return; if (make_stable) drain_local_pages(NULL); - for_each_populated_zone(zone) { + for_each_managed_zone(zone) { spin_lock_irqsave(>lock, flags); for_each_migratetype_order(order, t) { list_for_each(l, >free_area[order].free_list[t]) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 847705a6d0ec..2174baba0546 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -937,11 +937,9 @@ extern struct zone *next_zone(struct zone *zone); zone; \ zone = next_zone(zone)) -#define for_each_populated_zone(zone) \ - for (zone = (first_online_pgdat())->node_zones; \ -zone; \ -zone = next_zone(zone))\ - if (!populated_zone(zone)) \ +#define for_each_managed_zone(zone)\ + for_each_zone(zone) \ + if (!managed_zone(zone))\ ; /* do nothing */ \ else diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index b0308a2c6000..aa99efa73d89 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -592,10 +592,13 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) INIT_LIST_HEAD(list); - for_each_populated_zone(zone) { + for_each_zone(zone) { unsigned long zone_start, zone_end; struct mem_extent *ext, *cur, *aux; + if (!populated_zone(zone)) + continue; + zone_start = zone->zone_start_pfn; zone_end = zone_end_pfn(zone); @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void) struct zone *zone; unsigned int cnt = 0; - for_each_populated_zone(zone) - if (is_highmem(zone)) + for_each_zone(zone) + if (populated_zone(zone) && is_highmem(zone)) cnt += zone_page_state(zone, NR_FREE_PAGES); return cnt; @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void) struct zone *zone; unsigned int n = 0; - for_each_populated_zone(zone) { + for_each_zone(zone) { unsigned long pfn, max_zone_pfn; - if (!is_highmem(zone)) + if (!populated_zone(zone) || !is_highmem(zone)) continue; mark_free_pages(zone); @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void) unsigned long pfn, max_zone_pfn; unsigned int n = 0; -
[PATCH] mm: use managed_zone() for more exact check in zone iteration
For one zone, there are three digits to describe its space range: spanned_pages present_pages managed_pages The detailed meaning is written in include/linux/mmzone.h. This patch concerns about the last two. present_pages is physical pages existing within the zone managed_pages is present pages managed by the buddy system >From the definition, managed_pages is a more strict condition than present_pages. There are two functions using zone's present_pages as a boundary: populated_zone() for_each_populated_zone() By going through the kernel tree, most of their users are willing to access pages managed by the buddy system, which means it is more exact to check zone's managed_pages for a validation. This patch replaces those checks on present_pages to managed_pages by: * change for_each_populated_zone() to for_each_managed_zone() * convert for_each_populated_zone() to for_each_zone() and check populated_zone() where is necessary * change populated_zone() to managed_zone() at proper places Signed-off-by: Wei Yang --- Michal, after last mail, I did one more thing to replace populated_zone() with managed_zone() at proper places. One thing I am not sure is those places in mm/compaction.c. I have chaged them. If not, please let me know. BTW, I did a boot up test with the patched kernel and looks smooth. --- arch/s390/mm/page-states.c | 2 +- include/linux/mmzone.h | 8 +++- kernel/power/snapshot.c| 31 --- mm/compaction.c| 8 mm/highmem.c | 5 ++--- mm/huge_memory.c | 2 +- mm/khugepaged.c| 2 +- mm/madvise.c | 2 +- mm/migrate.c | 2 +- mm/page-writeback.c| 4 ++-- mm/page_alloc.c| 19 +-- mm/vmstat.c| 14 +++--- 12 files changed, 52 insertions(+), 47 deletions(-) diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index dc3cede7f2ec..015430bf0c63 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -265,7 +265,7 @@ void arch_set_page_states(int make_stable) return; if (make_stable) drain_local_pages(NULL); - for_each_populated_zone(zone) { + for_each_managed_zone(zone) { spin_lock_irqsave(>lock, flags); for_each_migratetype_order(order, t) { list_for_each(l, >free_area[order].free_list[t]) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 847705a6d0ec..2174baba0546 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -937,11 +937,9 @@ extern struct zone *next_zone(struct zone *zone); zone; \ zone = next_zone(zone)) -#define for_each_populated_zone(zone) \ - for (zone = (first_online_pgdat())->node_zones; \ -zone; \ -zone = next_zone(zone))\ - if (!populated_zone(zone)) \ +#define for_each_managed_zone(zone)\ + for_each_zone(zone) \ + if (!managed_zone(zone))\ ; /* do nothing */ \ else diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index b0308a2c6000..aa99efa73d89 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -592,10 +592,13 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) INIT_LIST_HEAD(list); - for_each_populated_zone(zone) { + for_each_zone(zone) { unsigned long zone_start, zone_end; struct mem_extent *ext, *cur, *aux; + if (!populated_zone(zone)) + continue; + zone_start = zone->zone_start_pfn; zone_end = zone_end_pfn(zone); @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void) struct zone *zone; unsigned int cnt = 0; - for_each_populated_zone(zone) - if (is_highmem(zone)) + for_each_zone(zone) + if (populated_zone(zone) && is_highmem(zone)) cnt += zone_page_state(zone, NR_FREE_PAGES); return cnt; @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void) struct zone *zone; unsigned int n = 0; - for_each_populated_zone(zone) { + for_each_zone(zone) { unsigned long pfn, max_zone_pfn; - if (!is_highmem(zone)) + if (!populated_zone(zone) || !is_highmem(zone)) continue; mark_free_pages(zone); @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void) unsigned long pfn, max_zone_pfn; unsigned int n = 0; -