Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-20 Thread Wei Yang
On Fri, Nov 16, 2018 at 11:24:05AM +0100, Michal Hocko wrote:
>On Thu 15-11-18 07:50:40, Wei Yang wrote:
>[...]
>> @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void)
>>  struct zone *zone;
>>  unsigned int cnt = 0;
>>  
>> -for_each_populated_zone(zone)
>> -if (is_highmem(zone))
>> +for_each_zone(zone)
>> +if (populated_zone(zone) && is_highmem(zone))
>>  cnt += zone_page_state(zone, NR_FREE_PAGES);
>
>this should be for_each_managed_zone because we only care about highmem
>zones which have pages in the allocator (NR_FREE_PAGES).
>
>>  
>>  return cnt;
>> @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void)
>>  struct zone *zone;
>>  unsigned int n = 0;
>>  
>> -for_each_populated_zone(zone) {
>> +for_each_zone(zone) {
>>  unsigned long pfn, max_zone_pfn;
>>  
>> -if (!is_highmem(zone))
>> +if (!populated_zone(zone) || !is_highmem(zone))
>>  continue;
>>  
>>  mark_free_pages(zone);
>
>I am not familiar with this code much but I strongly suspect that we do
>want for_each_managed_zone here because saveable_highmem_page skips over
>all reserved pages which rules out the bootmem. But this should be
>double checked with Rafael (Cc-ed).
>
>Rafael, does this loop care about pages which are not managed by the
>page allocator?
>

Hi, Rafael

Your opinion on this change and the following one is appreciated :-)

>> @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void)
>>  unsigned long pfn, max_zone_pfn;
>>  unsigned int n = 0;
>>  
>> -for_each_populated_zone(zone) {
>> -if (is_highmem(zone))
>> +for_each_zone(zone) {
>> +if (!populated_zone(zone) || is_highmem(zone))
>>  continue;
>>  
>>  mark_free_pages(zone);
>> @@ -1399,9 +1402,12 @@ static void copy_data_pages(struct memory_bitmap 
>> *copy_bm,
>>  struct zone *zone;
>>  unsigned long pfn;
>>  
>> -for_each_populated_zone(zone) {
>> +for_each_zone(zone) {
>>  unsigned long max_zone_pfn;
>>  
>> +if (!populated_zone(zone))
>> +continue;
>> +
>>  mark_free_pages(zone);
>>  max_zone_pfn = zone_end_pfn(zone);
>>  for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
>> @@ -1717,7 +1723,10 @@ int hibernate_preallocate_memory(void)
>>  saveable += save_highmem;
>>  highmem = save_highmem;
>>  size = 0;
>> -for_each_populated_zone(zone) {
>> +for_each_zone(zone) {
>> +if (!populated_zone(zone))
>> +continue;
>> +
>>  size += snapshot_additional_pages(zone);
>>  if (is_highmem(zone))
>>  highmem += zone_page_state(zone, NR_FREE_PAGES);
>
>ditto for the above.
>
>
>> @@ -1863,8 +1872,8 @@ static int enough_free_mem(unsigned int nr_pages, 
>> unsigned int nr_highmem)
>>  struct zone *zone;
>>  unsigned int free = alloc_normal;
>>  
>> -for_each_populated_zone(zone)
>> -if (!is_highmem(zone))
>> +for_each_zone(zone)
>> +if (populated_zone(zone) && !is_highmem(zone))
>>  free += zone_page_state(zone, NR_FREE_PAGES);
>>  
>>  nr_pages += count_pages_for_highmem(nr_highmem);
>
>This one should be for_each_managed_zone (NR_FREE_PAGES)
>
>The rest looks good to me.
>-- 
>Michal Hocko
>SUSE Labs

-- 
Wei Yang
Help you, Help me


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-20 Thread Wei Yang
On Fri, Nov 16, 2018 at 11:24:05AM +0100, Michal Hocko wrote:
>On Thu 15-11-18 07:50:40, Wei Yang wrote:
>[...]
>> @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void)
>>  struct zone *zone;
>>  unsigned int cnt = 0;
>>  
>> -for_each_populated_zone(zone)
>> -if (is_highmem(zone))
>> +for_each_zone(zone)
>> +if (populated_zone(zone) && is_highmem(zone))
>>  cnt += zone_page_state(zone, NR_FREE_PAGES);
>
>this should be for_each_managed_zone because we only care about highmem
>zones which have pages in the allocator (NR_FREE_PAGES).
>
>>  
>>  return cnt;
>> @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void)
>>  struct zone *zone;
>>  unsigned int n = 0;
>>  
>> -for_each_populated_zone(zone) {
>> +for_each_zone(zone) {
>>  unsigned long pfn, max_zone_pfn;
>>  
>> -if (!is_highmem(zone))
>> +if (!populated_zone(zone) || !is_highmem(zone))
>>  continue;
>>  
>>  mark_free_pages(zone);
>
>I am not familiar with this code much but I strongly suspect that we do
>want for_each_managed_zone here because saveable_highmem_page skips over
>all reserved pages which rules out the bootmem. But this should be
>double checked with Rafael (Cc-ed).
>
>Rafael, does this loop care about pages which are not managed by the
>page allocator?
>

Hi, Rafael

Your opinion on this change and the following one is appreciated :-)

>> @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void)
>>  unsigned long pfn, max_zone_pfn;
>>  unsigned int n = 0;
>>  
>> -for_each_populated_zone(zone) {
>> -if (is_highmem(zone))
>> +for_each_zone(zone) {
>> +if (!populated_zone(zone) || is_highmem(zone))
>>  continue;
>>  
>>  mark_free_pages(zone);
>> @@ -1399,9 +1402,12 @@ static void copy_data_pages(struct memory_bitmap 
>> *copy_bm,
>>  struct zone *zone;
>>  unsigned long pfn;
>>  
>> -for_each_populated_zone(zone) {
>> +for_each_zone(zone) {
>>  unsigned long max_zone_pfn;
>>  
>> +if (!populated_zone(zone))
>> +continue;
>> +
>>  mark_free_pages(zone);
>>  max_zone_pfn = zone_end_pfn(zone);
>>  for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
>> @@ -1717,7 +1723,10 @@ int hibernate_preallocate_memory(void)
>>  saveable += save_highmem;
>>  highmem = save_highmem;
>>  size = 0;
>> -for_each_populated_zone(zone) {
>> +for_each_zone(zone) {
>> +if (!populated_zone(zone))
>> +continue;
>> +
>>  size += snapshot_additional_pages(zone);
>>  if (is_highmem(zone))
>>  highmem += zone_page_state(zone, NR_FREE_PAGES);
>
>ditto for the above.
>
>
>> @@ -1863,8 +1872,8 @@ static int enough_free_mem(unsigned int nr_pages, 
>> unsigned int nr_highmem)
>>  struct zone *zone;
>>  unsigned int free = alloc_normal;
>>  
>> -for_each_populated_zone(zone)
>> -if (!is_highmem(zone))
>> +for_each_zone(zone)
>> +if (populated_zone(zone) && !is_highmem(zone))
>>  free += zone_page_state(zone, NR_FREE_PAGES);
>>  
>>  nr_pages += count_pages_for_highmem(nr_highmem);
>
>This one should be for_each_managed_zone (NR_FREE_PAGES)
>
>The rest looks good to me.
>-- 
>Michal Hocko
>SUSE Labs

-- 
Wei Yang
Help you, Help me


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Michal Hocko
On Fri 16-11-18 15:58:28, Wei Yang wrote:
> On Fri, Nov 16, 2018 at 12:26:03PM +0100, Michal Hocko wrote:
> >On Fri 16-11-18 12:05:04, osalvador wrote:
> >> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote:
> >[...]
> >> > E.g. memory hotplug decreases both managed and present counters. I
> >> > am actually not sure that is 100% correct (put on my TODO list to
> >> > check). There is no consistency in that regards.
> >> 
> >> We can only offline non-reserved pages (so, managed pages).
> >
> >Yes
> >
> >> Since present pages holds reserved_pages + managed_pages, decreasing
> >> both should be fine unless I am mistaken.
> >
> >Well, present_pages is defined as "physical pages existing within the zone"
> >and those pages are still existing but they are offline. But as I've
> >said I have to think about it some more
> 
> I may not catch up with your discussions, while I'd like to share what I
> learnt.
> 
> online_pages()
> online_pages_range()
> zone->present_pages += onlined_pages;
> 
> __offline_pages()
> adjust_managed_page_count()
> zone->present_pages -= offlined_pages;
> 
> The two counters: present_pages & managed_pages would be adjusted during
> online/offline.
> 
> While I am not sure when *reserved_pages* would be adjusted. Will we add
> this hot-added memory into memblock.reserved? and allocate memory by
> memblock_alloc() after system bootup?

This is not really related to this patch. I have only mentioned the
memory hotplug as an example. I would rather focus on the change itself
so let's not get too off topic here.

-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Michal Hocko
On Fri 16-11-18 15:58:28, Wei Yang wrote:
> On Fri, Nov 16, 2018 at 12:26:03PM +0100, Michal Hocko wrote:
> >On Fri 16-11-18 12:05:04, osalvador wrote:
> >> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote:
> >[...]
> >> > E.g. memory hotplug decreases both managed and present counters. I
> >> > am actually not sure that is 100% correct (put on my TODO list to
> >> > check). There is no consistency in that regards.
> >> 
> >> We can only offline non-reserved pages (so, managed pages).
> >
> >Yes
> >
> >> Since present pages holds reserved_pages + managed_pages, decreasing
> >> both should be fine unless I am mistaken.
> >
> >Well, present_pages is defined as "physical pages existing within the zone"
> >and those pages are still existing but they are offline. But as I've
> >said I have to think about it some more
> 
> I may not catch up with your discussions, while I'd like to share what I
> learnt.
> 
> online_pages()
> online_pages_range()
> zone->present_pages += onlined_pages;
> 
> __offline_pages()
> adjust_managed_page_count()
> zone->present_pages -= offlined_pages;
> 
> The two counters: present_pages & managed_pages would be adjusted during
> online/offline.
> 
> While I am not sure when *reserved_pages* would be adjusted. Will we add
> this hot-added memory into memblock.reserved? and allocate memory by
> memblock_alloc() after system bootup?

This is not really related to this patch. I have only mentioned the
memory hotplug as an example. I would rather focus on the change itself
so let's not get too off topic here.

-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Wei Yang
On Fri, Nov 16, 2018 at 12:26:03PM +0100, Michal Hocko wrote:
>On Fri 16-11-18 12:05:04, osalvador wrote:
>> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote:
>[...]
>> > E.g. memory hotplug decreases both managed and present counters. I
>> > am actually not sure that is 100% correct (put on my TODO list to
>> > check). There is no consistency in that regards.
>> 
>> We can only offline non-reserved pages (so, managed pages).
>
>Yes
>
>> Since present pages holds reserved_pages + managed_pages, decreasing
>> both should be fine unless I am mistaken.
>
>Well, present_pages is defined as "physical pages existing within the zone"
>and those pages are still existing but they are offline. But as I've
>said I have to think about it some more

I may not catch up with your discussions, while I'd like to share what I
learnt.

online_pages()
online_pages_range()
zone->present_pages += onlined_pages;

__offline_pages()
adjust_managed_page_count()
zone->present_pages -= offlined_pages;

The two counters: present_pages & managed_pages would be adjusted during
online/offline.

While I am not sure when *reserved_pages* would be adjusted. Will we add
this hot-added memory into memblock.reserved? and allocate memory by
memblock_alloc() after system bootup?

>-- 
>Michal Hocko
>SUSE Labs

-- 
Wei Yang
Help you, Help me


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Wei Yang
On Fri, Nov 16, 2018 at 12:26:03PM +0100, Michal Hocko wrote:
>On Fri 16-11-18 12:05:04, osalvador wrote:
>> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote:
>[...]
>> > E.g. memory hotplug decreases both managed and present counters. I
>> > am actually not sure that is 100% correct (put on my TODO list to
>> > check). There is no consistency in that regards.
>> 
>> We can only offline non-reserved pages (so, managed pages).
>
>Yes
>
>> Since present pages holds reserved_pages + managed_pages, decreasing
>> both should be fine unless I am mistaken.
>
>Well, present_pages is defined as "physical pages existing within the zone"
>and those pages are still existing but they are offline. But as I've
>said I have to think about it some more

I may not catch up with your discussions, while I'd like to share what I
learnt.

online_pages()
online_pages_range()
zone->present_pages += onlined_pages;

__offline_pages()
adjust_managed_page_count()
zone->present_pages -= offlined_pages;

The two counters: present_pages & managed_pages would be adjusted during
online/offline.

While I am not sure when *reserved_pages* would be adjusted. Will we add
this hot-added memory into memblock.reserved? and allocate memory by
memblock_alloc() after system bootup?

>-- 
>Michal Hocko
>SUSE Labs

-- 
Wei Yang
Help you, Help me


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Michal Hocko
On Fri 16-11-18 12:05:04, osalvador wrote:
> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote:
[...]
> > E.g. memory hotplug decreases both managed and present counters. I
> > am actually not sure that is 100% correct (put on my TODO list to
> > check). There is no consistency in that regards.
> 
> We can only offline non-reserved pages (so, managed pages).

Yes

> Since present pages holds reserved_pages + managed_pages, decreasing
> both should be fine unless I am mistaken.

Well, present_pages is defined as "physical pages existing within the zone"
and those pages are still existing but they are offline. But as I've
said I have to think about it some more
-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Michal Hocko
On Fri 16-11-18 12:05:04, osalvador wrote:
> On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote:
[...]
> > E.g. memory hotplug decreases both managed and present counters. I
> > am actually not sure that is 100% correct (put on my TODO list to
> > check). There is no consistency in that regards.
> 
> We can only offline non-reserved pages (so, managed pages).

Yes

> Since present pages holds reserved_pages + managed_pages, decreasing
> both should be fine unless I am mistaken.

Well, present_pages is defined as "physical pages existing within the zone"
and those pages are still existing but they are offline. But as I've
said I have to think about it some more
-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread osalvador
On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote:
> On Thu 15-11-18 13:37:35, Andrew Morton wrote:
> [...]
> > Worse, the situations in which managed_zone() != populated_zone()
> > are
> > rare(?), so it will take a long time for problems to be discovered,
> > I
> > expect.
> 
> We would basically have to deplete the whole zone by the bootmem
> allocator or pull out all pages from the page allocator. E.g. memory
> hotplug decreases both managed and present counters. I am actually
> not
> sure that is 100% correct (put on my TODO list to check). There is no
> consistency in that regards.

We can only offline non-reserved pages (so, managed pages).
Since present pages holds reserved_pages + managed_pages, decreasing
both should be fine unless I am mistaken.

Oscar Salvador





Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread osalvador
On Fri, 2018-11-16 at 10:57 +0100, Michal Hocko wrote:
> On Thu 15-11-18 13:37:35, Andrew Morton wrote:
> [...]
> > Worse, the situations in which managed_zone() != populated_zone()
> > are
> > rare(?), so it will take a long time for problems to be discovered,
> > I
> > expect.
> 
> We would basically have to deplete the whole zone by the bootmem
> allocator or pull out all pages from the page allocator. E.g. memory
> hotplug decreases both managed and present counters. I am actually
> not
> sure that is 100% correct (put on my TODO list to check). There is no
> consistency in that regards.

We can only offline non-reserved pages (so, managed pages).
Since present pages holds reserved_pages + managed_pages, decreasing
both should be fine unless I am mistaken.

Oscar Salvador





Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Michal Hocko
On Thu 15-11-18 07:50:40, Wei Yang wrote:
[...]
> @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void)
>   struct zone *zone;
>   unsigned int cnt = 0;
>  
> - for_each_populated_zone(zone)
> - if (is_highmem(zone))
> + for_each_zone(zone)
> + if (populated_zone(zone) && is_highmem(zone))
>   cnt += zone_page_state(zone, NR_FREE_PAGES);

this should be for_each_managed_zone because we only care about highmem
zones which have pages in the allocator (NR_FREE_PAGES).

>  
>   return cnt;
> @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void)
>   struct zone *zone;
>   unsigned int n = 0;
>  
> - for_each_populated_zone(zone) {
> + for_each_zone(zone) {
>   unsigned long pfn, max_zone_pfn;
>  
> - if (!is_highmem(zone))
> + if (!populated_zone(zone) || !is_highmem(zone))
>   continue;
>  
>   mark_free_pages(zone);

I am not familiar with this code much but I strongly suspect that we do
want for_each_managed_zone here because saveable_highmem_page skips over
all reserved pages which rules out the bootmem. But this should be
double checked with Rafael (Cc-ed).

Rafael, does this loop care about pages which are not managed by the
page allocator?

> @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void)
>   unsigned long pfn, max_zone_pfn;
>   unsigned int n = 0;
>  
> - for_each_populated_zone(zone) {
> - if (is_highmem(zone))
> + for_each_zone(zone) {
> + if (!populated_zone(zone) || is_highmem(zone))
>   continue;
>  
>   mark_free_pages(zone);
> @@ -1399,9 +1402,12 @@ static void copy_data_pages(struct memory_bitmap 
> *copy_bm,
>   struct zone *zone;
>   unsigned long pfn;
>  
> - for_each_populated_zone(zone) {
> + for_each_zone(zone) {
>   unsigned long max_zone_pfn;
>  
> + if (!populated_zone(zone))
> + continue;
> +
>   mark_free_pages(zone);
>   max_zone_pfn = zone_end_pfn(zone);
>   for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
> @@ -1717,7 +1723,10 @@ int hibernate_preallocate_memory(void)
>   saveable += save_highmem;
>   highmem = save_highmem;
>   size = 0;
> - for_each_populated_zone(zone) {
> + for_each_zone(zone) {
> + if (!populated_zone(zone))
> + continue;
> +
>   size += snapshot_additional_pages(zone);
>   if (is_highmem(zone))
>   highmem += zone_page_state(zone, NR_FREE_PAGES);

ditto for the above.


> @@ -1863,8 +1872,8 @@ static int enough_free_mem(unsigned int nr_pages, 
> unsigned int nr_highmem)
>   struct zone *zone;
>   unsigned int free = alloc_normal;
>  
> - for_each_populated_zone(zone)
> - if (!is_highmem(zone))
> + for_each_zone(zone)
> + if (populated_zone(zone) && !is_highmem(zone))
>   free += zone_page_state(zone, NR_FREE_PAGES);
>  
>   nr_pages += count_pages_for_highmem(nr_highmem);

This one should be for_each_managed_zone (NR_FREE_PAGES)

The rest looks good to me.
-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Michal Hocko
On Thu 15-11-18 07:50:40, Wei Yang wrote:
[...]
> @@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void)
>   struct zone *zone;
>   unsigned int cnt = 0;
>  
> - for_each_populated_zone(zone)
> - if (is_highmem(zone))
> + for_each_zone(zone)
> + if (populated_zone(zone) && is_highmem(zone))
>   cnt += zone_page_state(zone, NR_FREE_PAGES);

this should be for_each_managed_zone because we only care about highmem
zones which have pages in the allocator (NR_FREE_PAGES).

>  
>   return cnt;
> @@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void)
>   struct zone *zone;
>   unsigned int n = 0;
>  
> - for_each_populated_zone(zone) {
> + for_each_zone(zone) {
>   unsigned long pfn, max_zone_pfn;
>  
> - if (!is_highmem(zone))
> + if (!populated_zone(zone) || !is_highmem(zone))
>   continue;
>  
>   mark_free_pages(zone);

I am not familiar with this code much but I strongly suspect that we do
want for_each_managed_zone here because saveable_highmem_page skips over
all reserved pages which rules out the bootmem. But this should be
double checked with Rafael (Cc-ed).

Rafael, does this loop care about pages which are not managed by the
page allocator?

> @@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void)
>   unsigned long pfn, max_zone_pfn;
>   unsigned int n = 0;
>  
> - for_each_populated_zone(zone) {
> - if (is_highmem(zone))
> + for_each_zone(zone) {
> + if (!populated_zone(zone) || is_highmem(zone))
>   continue;
>  
>   mark_free_pages(zone);
> @@ -1399,9 +1402,12 @@ static void copy_data_pages(struct memory_bitmap 
> *copy_bm,
>   struct zone *zone;
>   unsigned long pfn;
>  
> - for_each_populated_zone(zone) {
> + for_each_zone(zone) {
>   unsigned long max_zone_pfn;
>  
> + if (!populated_zone(zone))
> + continue;
> +
>   mark_free_pages(zone);
>   max_zone_pfn = zone_end_pfn(zone);
>   for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
> @@ -1717,7 +1723,10 @@ int hibernate_preallocate_memory(void)
>   saveable += save_highmem;
>   highmem = save_highmem;
>   size = 0;
> - for_each_populated_zone(zone) {
> + for_each_zone(zone) {
> + if (!populated_zone(zone))
> + continue;
> +
>   size += snapshot_additional_pages(zone);
>   if (is_highmem(zone))
>   highmem += zone_page_state(zone, NR_FREE_PAGES);

ditto for the above.


> @@ -1863,8 +1872,8 @@ static int enough_free_mem(unsigned int nr_pages, 
> unsigned int nr_highmem)
>   struct zone *zone;
>   unsigned int free = alloc_normal;
>  
> - for_each_populated_zone(zone)
> - if (!is_highmem(zone))
> + for_each_zone(zone)
> + if (populated_zone(zone) && !is_highmem(zone))
>   free += zone_page_state(zone, NR_FREE_PAGES);
>  
>   nr_pages += count_pages_for_highmem(nr_highmem);

This one should be for_each_managed_zone (NR_FREE_PAGES)

The rest looks good to me.
-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Michal Hocko
On Thu 15-11-18 13:37:35, Andrew Morton wrote:
[...]
> Worse, the situations in which managed_zone() != populated_zone() are
> rare(?), so it will take a long time for problems to be discovered, I
> expect.

We would basically have to deplete the whole zone by the bootmem
allocator or pull out all pages from the page allocator. E.g. memory
hotplug decreases both managed and present counters. I am actually not
sure that is 100% correct (put on my TODO list to check). There is no
consistency in that regards.

That being said, I will review the patch (today hopefully) but
fundamentally most users should indeed care about managed pages when
iterating zones with memory. There should be a good reason why they
might want to look at reserved pages.

-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-16 Thread Michal Hocko
On Thu 15-11-18 13:37:35, Andrew Morton wrote:
[...]
> Worse, the situations in which managed_zone() != populated_zone() are
> rare(?), so it will take a long time for problems to be discovered, I
> expect.

We would basically have to deplete the whole zone by the bootmem
allocator or pull out all pages from the page allocator. E.g. memory
hotplug decreases both managed and present counters. I am actually not
sure that is 100% correct (put on my TODO list to check). There is no
consistency in that regards.

That being said, I will review the patch (today hopefully) but
fundamentally most users should indeed care about managed pages when
iterating zones with memory. There should be a good reason why they
might want to look at reserved pages.

-- 
Michal Hocko
SUSE Labs


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-15 Thread Wei Yang
On Thu, Nov 15, 2018 at 01:37:35PM -0800, Andrew Morton wrote:
>On Thu, 15 Nov 2018 07:50:40 +0800 Wei Yang  wrote:
>
>> For one zone, there are three digits to describe its space range:
>> 
>> spanned_pages
>> present_pages
>> managed_pages
>> 
>> The detailed meaning is written in include/linux/mmzone.h. This patch
>> concerns about the last two.
>> 
>> present_pages is physical pages existing within the zone
>> managed_pages is present pages managed by the buddy system
>> 
>> >From the definition, managed_pages is a more strict condition than
>> present_pages.
>> 
>> There are two functions using zone's present_pages as a boundary:
>> 
>> populated_zone()
>> for_each_populated_zone()
>> 
>> By going through the kernel tree, most of their users are willing to
>> access pages managed by the buddy system, which means it is more exact
>> to check zone's managed_pages for a validation.
>> 
>> This patch replaces those checks on present_pages to managed_pages by:
>> 
>> * change for_each_populated_zone() to for_each_managed_zone()
>> * convert for_each_populated_zone() to for_each_zone() and check
>>   populated_zone() where is necessary
>> * change populated_zone() to managed_zone() at proper places
>> 
>> Signed-off-by: Wei Yang 
>> 
>> ---
>> 
>> Michal, after last mail, I did one more thing to replace
>> populated_zone() with managed_zone() at proper places.
>> 
>> One thing I am not sure is those places in mm/compaction.c. I have
>> chaged them. If not, please let me know.
>> 
>> BTW, I did a boot up test with the patched kernel and looks smooth.
>
>Seems sensible, but a bit scary.  A basic boot test is unlikely to
>expose subtle gremlins.
>

Agree.

>Worse, the situations in which managed_zone() != populated_zone() are
>rare(?), so it will take a long time for problems to be discovered, I
>expect.

Hmm... I created a virtual machine with 4 nodes, which has total 6
populated zones. All of them are different.

This is a little bit out of my expactation.

>
>I'll toss it in there for now, let's see who breaks :(

Thanks.

-- 
Wei Yang
Help you, Help me


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-15 Thread Wei Yang
On Thu, Nov 15, 2018 at 01:37:35PM -0800, Andrew Morton wrote:
>On Thu, 15 Nov 2018 07:50:40 +0800 Wei Yang  wrote:
>
>> For one zone, there are three digits to describe its space range:
>> 
>> spanned_pages
>> present_pages
>> managed_pages
>> 
>> The detailed meaning is written in include/linux/mmzone.h. This patch
>> concerns about the last two.
>> 
>> present_pages is physical pages existing within the zone
>> managed_pages is present pages managed by the buddy system
>> 
>> >From the definition, managed_pages is a more strict condition than
>> present_pages.
>> 
>> There are two functions using zone's present_pages as a boundary:
>> 
>> populated_zone()
>> for_each_populated_zone()
>> 
>> By going through the kernel tree, most of their users are willing to
>> access pages managed by the buddy system, which means it is more exact
>> to check zone's managed_pages for a validation.
>> 
>> This patch replaces those checks on present_pages to managed_pages by:
>> 
>> * change for_each_populated_zone() to for_each_managed_zone()
>> * convert for_each_populated_zone() to for_each_zone() and check
>>   populated_zone() where is necessary
>> * change populated_zone() to managed_zone() at proper places
>> 
>> Signed-off-by: Wei Yang 
>> 
>> ---
>> 
>> Michal, after last mail, I did one more thing to replace
>> populated_zone() with managed_zone() at proper places.
>> 
>> One thing I am not sure is those places in mm/compaction.c. I have
>> chaged them. If not, please let me know.
>> 
>> BTW, I did a boot up test with the patched kernel and looks smooth.
>
>Seems sensible, but a bit scary.  A basic boot test is unlikely to
>expose subtle gremlins.
>

Agree.

>Worse, the situations in which managed_zone() != populated_zone() are
>rare(?), so it will take a long time for problems to be discovered, I
>expect.

Hmm... I created a virtual machine with 4 nodes, which has total 6
populated zones. All of them are different.

This is a little bit out of my expactation.

>
>I'll toss it in there for now, let's see who breaks :(

Thanks.

-- 
Wei Yang
Help you, Help me


Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-15 Thread Andrew Morton
On Thu, 15 Nov 2018 07:50:40 +0800 Wei Yang  wrote:

> For one zone, there are three digits to describe its space range:
> 
> spanned_pages
> present_pages
> managed_pages
> 
> The detailed meaning is written in include/linux/mmzone.h. This patch
> concerns about the last two.
> 
> present_pages is physical pages existing within the zone
> managed_pages is present pages managed by the buddy system
> 
> >From the definition, managed_pages is a more strict condition than
> present_pages.
> 
> There are two functions using zone's present_pages as a boundary:
> 
> populated_zone()
> for_each_populated_zone()
> 
> By going through the kernel tree, most of their users are willing to
> access pages managed by the buddy system, which means it is more exact
> to check zone's managed_pages for a validation.
> 
> This patch replaces those checks on present_pages to managed_pages by:
> 
> * change for_each_populated_zone() to for_each_managed_zone()
> * convert for_each_populated_zone() to for_each_zone() and check
>   populated_zone() where is necessary
> * change populated_zone() to managed_zone() at proper places
> 
> Signed-off-by: Wei Yang 
> 
> ---
> 
> Michal, after last mail, I did one more thing to replace
> populated_zone() with managed_zone() at proper places.
> 
> One thing I am not sure is those places in mm/compaction.c. I have
> chaged them. If not, please let me know.
> 
> BTW, I did a boot up test with the patched kernel and looks smooth.

Seems sensible, but a bit scary.  A basic boot test is unlikely to
expose subtle gremlins.

Worse, the situations in which managed_zone() != populated_zone() are
rare(?), so it will take a long time for problems to be discovered, I
expect.

I'll toss it in there for now, let's see who breaks :(



Re: [PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-15 Thread Andrew Morton
On Thu, 15 Nov 2018 07:50:40 +0800 Wei Yang  wrote:

> For one zone, there are three digits to describe its space range:
> 
> spanned_pages
> present_pages
> managed_pages
> 
> The detailed meaning is written in include/linux/mmzone.h. This patch
> concerns about the last two.
> 
> present_pages is physical pages existing within the zone
> managed_pages is present pages managed by the buddy system
> 
> >From the definition, managed_pages is a more strict condition than
> present_pages.
> 
> There are two functions using zone's present_pages as a boundary:
> 
> populated_zone()
> for_each_populated_zone()
> 
> By going through the kernel tree, most of their users are willing to
> access pages managed by the buddy system, which means it is more exact
> to check zone's managed_pages for a validation.
> 
> This patch replaces those checks on present_pages to managed_pages by:
> 
> * change for_each_populated_zone() to for_each_managed_zone()
> * convert for_each_populated_zone() to for_each_zone() and check
>   populated_zone() where is necessary
> * change populated_zone() to managed_zone() at proper places
> 
> Signed-off-by: Wei Yang 
> 
> ---
> 
> Michal, after last mail, I did one more thing to replace
> populated_zone() with managed_zone() at proper places.
> 
> One thing I am not sure is those places in mm/compaction.c. I have
> chaged them. If not, please let me know.
> 
> BTW, I did a boot up test with the patched kernel and looks smooth.

Seems sensible, but a bit scary.  A basic boot test is unlikely to
expose subtle gremlins.

Worse, the situations in which managed_zone() != populated_zone() are
rare(?), so it will take a long time for problems to be discovered, I
expect.

I'll toss it in there for now, let's see who breaks :(



[PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-14 Thread Wei Yang
For one zone, there are three digits to describe its space range:

spanned_pages
present_pages
managed_pages

The detailed meaning is written in include/linux/mmzone.h. This patch
concerns about the last two.

present_pages is physical pages existing within the zone
managed_pages is present pages managed by the buddy system

>From the definition, managed_pages is a more strict condition than
present_pages.

There are two functions using zone's present_pages as a boundary:

populated_zone()
for_each_populated_zone()

By going through the kernel tree, most of their users are willing to
access pages managed by the buddy system, which means it is more exact
to check zone's managed_pages for a validation.

This patch replaces those checks on present_pages to managed_pages by:

* change for_each_populated_zone() to for_each_managed_zone()
* convert for_each_populated_zone() to for_each_zone() and check
  populated_zone() where is necessary
* change populated_zone() to managed_zone() at proper places

Signed-off-by: Wei Yang 

---

Michal, after last mail, I did one more thing to replace
populated_zone() with managed_zone() at proper places.

One thing I am not sure is those places in mm/compaction.c. I have
chaged them. If not, please let me know.

BTW, I did a boot up test with the patched kernel and looks smooth.
---
 arch/s390/mm/page-states.c |  2 +-
 include/linux/mmzone.h |  8 +++-
 kernel/power/snapshot.c| 31 ---
 mm/compaction.c|  8 
 mm/highmem.c   |  5 ++---
 mm/huge_memory.c   |  2 +-
 mm/khugepaged.c|  2 +-
 mm/madvise.c   |  2 +-
 mm/migrate.c   |  2 +-
 mm/page-writeback.c|  4 ++--
 mm/page_alloc.c| 19 +--
 mm/vmstat.c| 14 +++---
 12 files changed, 52 insertions(+), 47 deletions(-)

diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index dc3cede7f2ec..015430bf0c63 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -265,7 +265,7 @@ void arch_set_page_states(int make_stable)
return;
if (make_stable)
drain_local_pages(NULL);
-   for_each_populated_zone(zone) {
+   for_each_managed_zone(zone) {
spin_lock_irqsave(>lock, flags);
for_each_migratetype_order(order, t) {
list_for_each(l, >free_area[order].free_list[t]) {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 847705a6d0ec..2174baba0546 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -937,11 +937,9 @@ extern struct zone *next_zone(struct zone *zone);
 zone;  \
 zone = next_zone(zone))
 
-#define for_each_populated_zone(zone)  \
-   for (zone = (first_online_pgdat())->node_zones; \
-zone;  \
-zone = next_zone(zone))\
-   if (!populated_zone(zone))  \
+#define for_each_managed_zone(zone)\
+   for_each_zone(zone) \
+   if (!managed_zone(zone))\
; /* do nothing */  \
else
 
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index b0308a2c6000..aa99efa73d89 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -592,10 +592,13 @@ static int create_mem_extents(struct list_head *list, 
gfp_t gfp_mask)
 
INIT_LIST_HEAD(list);
 
-   for_each_populated_zone(zone) {
+   for_each_zone(zone) {
unsigned long zone_start, zone_end;
struct mem_extent *ext, *cur, *aux;
 
+   if (!populated_zone(zone))
+   continue;
+
zone_start = zone->zone_start_pfn;
zone_end = zone_end_pfn(zone);
 
@@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void)
struct zone *zone;
unsigned int cnt = 0;
 
-   for_each_populated_zone(zone)
-   if (is_highmem(zone))
+   for_each_zone(zone)
+   if (populated_zone(zone) && is_highmem(zone))
cnt += zone_page_state(zone, NR_FREE_PAGES);
 
return cnt;
@@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void)
struct zone *zone;
unsigned int n = 0;
 
-   for_each_populated_zone(zone) {
+   for_each_zone(zone) {
unsigned long pfn, max_zone_pfn;
 
-   if (!is_highmem(zone))
+   if (!populated_zone(zone) || !is_highmem(zone))
continue;
 
mark_free_pages(zone);
@@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void)
unsigned long pfn, max_zone_pfn;
unsigned int n = 0;
 
- 

[PATCH] mm: use managed_zone() for more exact check in zone iteration

2018-11-14 Thread Wei Yang
For one zone, there are three digits to describe its space range:

spanned_pages
present_pages
managed_pages

The detailed meaning is written in include/linux/mmzone.h. This patch
concerns about the last two.

present_pages is physical pages existing within the zone
managed_pages is present pages managed by the buddy system

>From the definition, managed_pages is a more strict condition than
present_pages.

There are two functions using zone's present_pages as a boundary:

populated_zone()
for_each_populated_zone()

By going through the kernel tree, most of their users are willing to
access pages managed by the buddy system, which means it is more exact
to check zone's managed_pages for a validation.

This patch replaces those checks on present_pages to managed_pages by:

* change for_each_populated_zone() to for_each_managed_zone()
* convert for_each_populated_zone() to for_each_zone() and check
  populated_zone() where is necessary
* change populated_zone() to managed_zone() at proper places

Signed-off-by: Wei Yang 

---

Michal, after last mail, I did one more thing to replace
populated_zone() with managed_zone() at proper places.

One thing I am not sure is those places in mm/compaction.c. I have
chaged them. If not, please let me know.

BTW, I did a boot up test with the patched kernel and looks smooth.
---
 arch/s390/mm/page-states.c |  2 +-
 include/linux/mmzone.h |  8 +++-
 kernel/power/snapshot.c| 31 ---
 mm/compaction.c|  8 
 mm/highmem.c   |  5 ++---
 mm/huge_memory.c   |  2 +-
 mm/khugepaged.c|  2 +-
 mm/madvise.c   |  2 +-
 mm/migrate.c   |  2 +-
 mm/page-writeback.c|  4 ++--
 mm/page_alloc.c| 19 +--
 mm/vmstat.c| 14 +++---
 12 files changed, 52 insertions(+), 47 deletions(-)

diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index dc3cede7f2ec..015430bf0c63 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -265,7 +265,7 @@ void arch_set_page_states(int make_stable)
return;
if (make_stable)
drain_local_pages(NULL);
-   for_each_populated_zone(zone) {
+   for_each_managed_zone(zone) {
spin_lock_irqsave(>lock, flags);
for_each_migratetype_order(order, t) {
list_for_each(l, >free_area[order].free_list[t]) {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 847705a6d0ec..2174baba0546 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -937,11 +937,9 @@ extern struct zone *next_zone(struct zone *zone);
 zone;  \
 zone = next_zone(zone))
 
-#define for_each_populated_zone(zone)  \
-   for (zone = (first_online_pgdat())->node_zones; \
-zone;  \
-zone = next_zone(zone))\
-   if (!populated_zone(zone))  \
+#define for_each_managed_zone(zone)\
+   for_each_zone(zone) \
+   if (!managed_zone(zone))\
; /* do nothing */  \
else
 
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index b0308a2c6000..aa99efa73d89 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -592,10 +592,13 @@ static int create_mem_extents(struct list_head *list, 
gfp_t gfp_mask)
 
INIT_LIST_HEAD(list);
 
-   for_each_populated_zone(zone) {
+   for_each_zone(zone) {
unsigned long zone_start, zone_end;
struct mem_extent *ext, *cur, *aux;
 
+   if (!populated_zone(zone))
+   continue;
+
zone_start = zone->zone_start_pfn;
zone_end = zone_end_pfn(zone);
 
@@ -1193,8 +1196,8 @@ static unsigned int count_free_highmem_pages(void)
struct zone *zone;
unsigned int cnt = 0;
 
-   for_each_populated_zone(zone)
-   if (is_highmem(zone))
+   for_each_zone(zone)
+   if (populated_zone(zone) && is_highmem(zone))
cnt += zone_page_state(zone, NR_FREE_PAGES);
 
return cnt;
@@ -1239,10 +1242,10 @@ static unsigned int count_highmem_pages(void)
struct zone *zone;
unsigned int n = 0;
 
-   for_each_populated_zone(zone) {
+   for_each_zone(zone) {
unsigned long pfn, max_zone_pfn;
 
-   if (!is_highmem(zone))
+   if (!populated_zone(zone) || !is_highmem(zone))
continue;
 
mark_free_pages(zone);
@@ -1305,8 +1308,8 @@ static unsigned int count_data_pages(void)
unsigned long pfn, max_zone_pfn;
unsigned int n = 0;
 
-