Re: [PATCH v6 5/7] ASoC: fsl_asrc: Move common definition to fsl_asrc_common

2020-04-12 Thread Nicolin Chen
On Mon, Apr 13, 2020 at 11:16:31AM +0800, Shengjiu Wang wrote:
> On Sun, Apr 12, 2020 at 10:08 AM Nicolin Chen  wrote:
> >
> > On Sat, Apr 11, 2020 at 01:49:43PM +0800, Shengjiu Wang wrote:
> >
> > > > > diff --git a/sound/soc/fsl/fsl_asrc_dma.c 
> > > > > b/sound/soc/fsl/fsl_asrc_dma.c
> > > > > index b15946e03380..5cf0468ce6e3 100644
> > > > > --- a/sound/soc/fsl/fsl_asrc_dma.c
> > > > > +++ b/sound/soc/fsl/fsl_asrc_dma.c
> > > >
> > > > > @@ -311,11 +311,12 @@ static int fsl_asrc_dma_startup(struct 
> > > > > snd_soc_component *component,
> > > > >   return ret;
> > > > >   }
> > > > >
> > > > > - pair = kzalloc(sizeof(struct fsl_asrc_pair), GFP_KERNEL);
> > > > > + pair = kzalloc(sizeof(struct fsl_asrc_pair) + PAIR_PRIVAT_SIZE, 
> > > > > GFP_KERNEL);
> > > >
> > > > If we only use the PAIR_PRIVATE_SIZE here, maybe we can put the
> > > > define in this file too, rather than in the header file.
> > > >
> > > > And could fit 80 characters:
> > > >
> > > > +   pair = kzalloc(sizeof(*pair) + PAIR_PRIVAT_SIZE, GFP_KERNEL);
> >
> > > I will use a function pointer
> > > int (*get_pair_priv_size)(void)
> >
> > Since it's the size of pair or cts structure, could be just a
> > size_t variable?
> 
> Yes, should be "size_t (*get_pair_priv_size)(void)"

Does it have to be a function? -- how about this:

struct pair {
...
size_t private_size;
void *private;
};

probe/or-somewhere() {
...
pair->private = pair_priv;
pair->private_size = sizeof(*pair_priv);
...
}


Re: [PATCH] papr/scm: Add bad memory ranges to nvdimm bad ranges

2020-04-12 Thread kbuild test robot
Hi Santosh,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v5.7-rc1 next-20200412]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Santosh-Sivaraj/papr-scm-Add-bad-memory-ranges-to-nvdimm-bad-ranges/20200401-171233
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allyesconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   arch/powerpc/platforms/pseries/papr_scm.c: In function 'papr_scm_init':
>> arch/powerpc/platforms/pseries/papr_scm.c:584:3: error: implicit declaration 
>> of function 'mce_register_notifier'; did you mean 'bus_register_notifier'? 
>> [-Werror=implicit-function-declaration]
 584 |   mce_register_notifier(_ue_nb);
 |   ^
 |   bus_register_notifier
   arch/powerpc/platforms/pseries/papr_scm.c: In function 'papr_scm_exit':
>> arch/powerpc/platforms/pseries/papr_scm.c:592:2: error: implicit declaration 
>> of function 'mce_unregister_notifier'; did you mean 
>> 'bus_unregister_notifier'? [-Werror=implicit-function-declaration]
 592 |  mce_unregister_notifier(_ue_nb);
 |  ^~~
 |  bus_unregister_notifier
   cc1: some warnings being treated as errors

vim +584 arch/powerpc/platforms/pseries/papr_scm.c

   577  
   578  static int __init papr_scm_init(void)
   579  {
   580  int ret;
   581  
   582  ret = platform_driver_register(_scm_driver);
   583  if (!ret)
 > 584  mce_register_notifier(_ue_nb);
   585  
   586  return ret;
   587  }
   588  module_init(papr_scm_init);
   589  
   590  static void __exit papr_scm_exit(void)
   591  {
 > 592  mce_unregister_notifier(_ue_nb);
   593  platform_driver_unregister(_scm_driver);
   594  }
   595  module_exit(papr_scm_exit);
   596  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH v6 5/7] ASoC: fsl_asrc: Move common definition to fsl_asrc_common

2020-04-12 Thread Shengjiu Wang
On Sun, Apr 12, 2020 at 10:08 AM Nicolin Chen  wrote:
>
> On Sat, Apr 11, 2020 at 01:49:43PM +0800, Shengjiu Wang wrote:
>
> > > > diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c
> > > > index b15946e03380..5cf0468ce6e3 100644
> > > > --- a/sound/soc/fsl/fsl_asrc_dma.c
> > > > +++ b/sound/soc/fsl/fsl_asrc_dma.c
> > >
> > > > @@ -311,11 +311,12 @@ static int fsl_asrc_dma_startup(struct 
> > > > snd_soc_component *component,
> > > >   return ret;
> > > >   }
> > > >
> > > > - pair = kzalloc(sizeof(struct fsl_asrc_pair), GFP_KERNEL);
> > > > + pair = kzalloc(sizeof(struct fsl_asrc_pair) + PAIR_PRIVAT_SIZE, 
> > > > GFP_KERNEL);
> > >
> > > If we only use the PAIR_PRIVATE_SIZE here, maybe we can put the
> > > define in this file too, rather than in the header file.
> > >
> > > And could fit 80 characters:
> > >
> > > +   pair = kzalloc(sizeof(*pair) + PAIR_PRIVAT_SIZE, GFP_KERNEL);
>
> > I will use a function pointer
> > int (*get_pair_priv_size)(void)
>
> Since it's the size of pair or cts structure, could be just a
> size_t variable?

Yes, should be "size_t (*get_pair_priv_size)(void)"

best regards
wang shengjiu


Re: [PATCH v5 0/6] implement KASLR for powerpc/fsl_booke/64

2020-04-12 Thread Jason Yan

ping...

在 2020/3/30 10:20, Jason Yan 写道:

This is a try to implement KASLR for Freescale BookE64 which is based on
my earlier implementation for Freescale BookE32:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=131718=*

The implementation for Freescale BookE64 is similar as BookE32. One
difference is that Freescale BookE64 set up a TLB mapping of 1G during
booting. Another difference is that ppc64 needs the kernel to be
64K-aligned. So we can randomize the kernel in this 1G mapping and make
it 64K-aligned. This can save some code to creat another TLB map at
early boot. The disadvantage is that we only have about 1G/64K = 16384
slots to put the kernel in.

 KERNELBASE

   64K |--> kernel <--|
|  |  |
 +--+--+--++--+--+--+--+--+--+--+--+--++--+--+
 |  |  |  ||  |  |  |  |  |  |  |  |  ||  |  |
 +--+--+--++--+--+--+--+--+--+--+--+--++--+--+
 | |1G
 |->   offset<-|

   kernstart_virt_addr

I'm not sure if the slot numbers is enough or the design has any
defects. If you have some better ideas, I would be happy to hear that.

Thank you all.

v4->v5:
   Fix "-Werror=maybe-uninitialized" compile error.
   Fix typo "similar as" -> "similar to".
v3->v4:
   Do not define __kaslr_offset as a fixed symbol. Reference __run_at_load and
 __kaslr_offset by symbol instead of magic offsets.
   Use IS_ENABLED(CONFIG_PPC32) instead of #ifdef CONFIG_PPC32.
   Change kaslr-booke32 to kaslr-booke in index.rst
   Switch some instructions to 64-bit.
v2->v3:
   Fix build error when KASLR is disabled.
v1->v2:
   Add __kaslr_offset for the secondary cpu boot up.

Jason Yan (6):
   powerpc/fsl_booke/kaslr: refactor kaslr_legal_offset() and
 kaslr_early_init()
   powerpc/fsl_booke/64: introduce reloc_kernel_entry() helper
   powerpc/fsl_booke/64: implement KASLR for fsl_booke64
   powerpc/fsl_booke/64: do not clear the BSS for the second pass
   powerpc/fsl_booke/64: clear the original kernel if randomized
   powerpc/fsl_booke/kaslr: rename kaslr-booke32.rst to kaslr-booke.rst
 and add 64bit part

  Documentation/powerpc/index.rst   |  2 +-
  .../{kaslr-booke32.rst => kaslr-booke.rst}| 35 ++-
  arch/powerpc/Kconfig  |  2 +-
  arch/powerpc/kernel/exceptions-64e.S  | 23 +
  arch/powerpc/kernel/head_64.S | 13 +++
  arch/powerpc/kernel/setup_64.c|  3 +
  arch/powerpc/mm/mmu_decl.h| 23 +++--
  arch/powerpc/mm/nohash/kaslr_booke.c  | 91 +--
  8 files changed, 147 insertions(+), 45 deletions(-)
  rename Documentation/powerpc/{kaslr-booke32.rst => kaslr-booke.rst} (59%)





[PATCH] KVM: PPC: Book3S: Remove unneeded NULL check before kfree()

2020-04-12 Thread Alex Dewar
kfree() already checks for NULL arguments, so this check is reduntant.
Remove it.

Signed-off-by: Alex Dewar 
---
 arch/powerpc/kvm/book3s_hv_nested.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index dc97e5be76f61..cad324312040b 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1416,8 +1416,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run 
*run,
rmapp = >arch.rmap[gfn - memslot->base_gfn];
ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
mmu_seq, gp->shadow_lpid, rmapp, _rmap);
-   if (n_rmap)
-   kfree(n_rmap);
+   kfree(n_rmap);
if (ret == -EAGAIN)
ret = RESUME_GUEST; /* Let the guest try again */

--
2.26.0



[PATCH 21/21] docs/vm: update memory-models documentation

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

to reflect the updates to free_area_init() family of functions.

Signed-off-by: Mike Rapoport 
---
 Documentation/vm/memory-model.rst | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/Documentation/vm/memory-model.rst 
b/Documentation/vm/memory-model.rst
index 58a12376b7df..91228044ed16 100644
--- a/Documentation/vm/memory-model.rst
+++ b/Documentation/vm/memory-model.rst
@@ -46,11 +46,10 @@ maps the entire physical memory. For most architectures, 
the holes
 have entries in the `mem_map` array. The `struct page` objects
 corresponding to the holes are never fully initialized.
 
-To allocate the `mem_map` array, architecture specific setup code
-should call :c:func:`free_area_init_node` function or its convenience
-wrapper :c:func:`free_area_init`. Yet, the mappings array is not
-usable until the call to :c:func:`memblock_free_all` that hands all
-the memory to the page allocator.
+To allocate the `mem_map` array, architecture specific setup code should
+call :c:func:`free_area_init` function. Yet, the mappings array is not
+usable until the call to :c:func:`memblock_free_all` that hands all the
+memory to the page allocator.
 
 If an architecture enables `CONFIG_ARCH_HAS_HOLES_MEMORYMODEL` option,
 it may free parts of the `mem_map` array that do not cover the
-- 
2.25.1



[PATCH 20/21] mm: simplify find_min_pfn_with_active_regions()

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The find_min_pfn_with_active_regions() calls find_min_pfn_for_node() with
nid parameter set to MAX_NUMNODES. This makes the find_min_pfn_for_node()
traverse all memblock memory regions although the first PFN in the system
can be easily found with memblock_start_of_DRAM().

Use memblock_start_of_DRAM() in find_min_pfn_with_active_regions() and drop
now unused find_min_pfn_for_node().

Signed-off-by: Mike Rapoport 
---
 mm/page_alloc.c | 20 +---
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9af27ee784c7..e83f28d6074a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7071,24 +7071,6 @@ unsigned long __init node_map_pfn_alignment(void)
return ~accl_mask + 1;
 }
 
-/* Find the lowest pfn for a node */
-static unsigned long __init find_min_pfn_for_node(int nid)
-{
-   unsigned long min_pfn = ULONG_MAX;
-   unsigned long start_pfn;
-   int i;
-
-   for_each_mem_pfn_range(i, nid, _pfn, NULL, NULL)
-   min_pfn = min(min_pfn, start_pfn);
-
-   if (min_pfn == ULONG_MAX) {
-   pr_warn("Could not find start_pfn for node %d\n", nid);
-   return 0;
-   }
-
-   return min_pfn;
-}
-
 /**
  * find_min_pfn_with_active_regions - Find the minimum PFN registered
  *
@@ -7097,7 +7079,7 @@ static unsigned long __init find_min_pfn_for_node(int nid)
  */
 unsigned long __init find_min_pfn_with_active_regions(void)
 {
-   return find_min_pfn_for_node(MAX_NUMNODES);
+   return PHYS_PFN(memblock_start_of_DRAM());
 }
 
 /*
-- 
2.25.1



[PATCH 19/21] mm: clean up free_area_init_node() and its helpers

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init_node() now always uses memblock info and the zone PFN
limits so it does not need the backwards compatibility functions to
calculate the zone spanned and absent pages. The removal of the compat_
versions of zone_{abscent,spanned}_pages_in_node() in turn, makes zone_size
and zhole_size parameters unused.

The node_start_pfn is determined by get_pfn_range_for_nid(), so there is no
need to pass it to free_area_init_node().

As the result, the only required parameter to free_area_init_node() is the
node ID, all the rest are removed along with no longer used
compat_zone_{abscent,spanned}_pages_in_node() helpers.

Signed-off-by: Mike Rapoport 
---
 mm/page_alloc.c | 104 ++--
 1 file changed, 22 insertions(+), 82 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e46232ec4849..9af27ee784c7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6441,8 +6441,7 @@ static unsigned long __init 
zone_spanned_pages_in_node(int nid,
unsigned long node_start_pfn,
unsigned long node_end_pfn,
unsigned long *zone_start_pfn,
-   unsigned long *zone_end_pfn,
-   unsigned long *ignored)
+   unsigned long *zone_end_pfn)
 {
unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
@@ -6506,8 +6505,7 @@ unsigned long __init absent_pages_in_range(unsigned long 
start_pfn,
 static unsigned long __init zone_absent_pages_in_node(int nid,
unsigned long zone_type,
unsigned long node_start_pfn,
-   unsigned long node_end_pfn,
-   unsigned long *ignored)
+   unsigned long node_end_pfn)
 {
unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
@@ -6554,43 +6552,9 @@ static unsigned long __init 
zone_absent_pages_in_node(int nid,
return nr_absent;
 }
 
-static inline unsigned long __init compat_zone_spanned_pages_in_node(int nid,
-   unsigned long zone_type,
-   unsigned long node_start_pfn,
-   unsigned long node_end_pfn,
-   unsigned long *zone_start_pfn,
-   unsigned long *zone_end_pfn,
-   unsigned long *zones_size)
-{
-   unsigned int zone;
-
-   *zone_start_pfn = node_start_pfn;
-   for (zone = 0; zone < zone_type; zone++)
-   *zone_start_pfn += zones_size[zone];
-
-   *zone_end_pfn = *zone_start_pfn + zones_size[zone_type];
-
-   return zones_size[zone_type];
-}
-
-static inline unsigned long __init compat_zone_absent_pages_in_node(int nid,
-   unsigned long zone_type,
-   unsigned long node_start_pfn,
-   unsigned long node_end_pfn,
-   unsigned long *zholes_size)
-{
-   if (!zholes_size)
-   return 0;
-
-   return zholes_size[zone_type];
-}
-
 static void __init calculate_node_totalpages(struct pglist_data *pgdat,
unsigned long node_start_pfn,
-   unsigned long node_end_pfn,
-   unsigned long *zones_size,
-   unsigned long *zholes_size,
-   bool compat)
+   unsigned long node_end_pfn)
 {
unsigned long realtotalpages = 0, totalpages = 0;
enum zone_type i;
@@ -6601,31 +6565,14 @@ static void __init calculate_node_totalpages(struct 
pglist_data *pgdat,
unsigned long spanned, absent;
unsigned long size, real_size;
 
-   if (compat) {
-   spanned = compat_zone_spanned_pages_in_node(
-   pgdat->node_id, i,
-   node_start_pfn,
-   node_end_pfn,
-   _start_pfn,
-   _end_pfn,
-   zones_size);
-   absent = compat_zone_absent_pages_in_node(
- 

[PATCH 18/21] mm: rename free_area_init_node() to free_area_init_memoryless_node()

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init_node() is only used by x86 to initialize a memory-less
nodes.
Make its name reflect this and drop all the function parameters except node
ID as they are anyway zero.

Signed-off-by: Mike Rapoport 
---
 arch/x86/mm/numa.c | 5 +
 include/linux/mm.h | 9 +++--
 mm/page_alloc.c| 7 ++-
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fe024b2ac796..8ee952038c80 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -737,12 +737,9 @@ void __init x86_numa_init(void)
 
 static void __init init_memory_less_node(int nid)
 {
-   unsigned long zones_size[MAX_NR_ZONES] = {0};
-   unsigned long zholes_size[MAX_NR_ZONES] = {0};
-
/* Allocate and initialize node data. Memory-less node is now online.*/
alloc_node_data(nid);
-   free_area_init_node(nid, zones_size, 0, zholes_size);
+   free_area_init_memoryless_node(nid);
 
/*
 * All zonelists will be built later in start_kernel() after per cpu
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1c2ecb42e043..27660f6cf26e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2272,8 +2272,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, 
pud_t *pud)
 }
 
 extern void __init pagecache_init(void);
-extern void __init free_area_init_node(int nid, unsigned long * zones_size,
-   unsigned long zone_start_pfn, unsigned long *zholes_size);
+extern void __init free_area_init_memoryless_node(int nid);
 extern void free_initmem(void);
 
 /*
@@ -2345,10 +2344,8 @@ static inline unsigned long get_num_physpages(void)
 
 /*
  * Using memblock node mappings, an architecture may initialise its
- * zones, allocate the backing mem_map and account for memory holes in a more
- * architecture independent manner. This is a substitute for creating the
- * zone_sizes[] and zholes_size[] arrays and passing them to
- * free_area_init_node()
+ * zones, allocate the backing mem_map and account for memory holes in an
+ * architecture independent manner.
  *
  * An architecture is expected to register range of page frames backed by
  * physical memory with memblock_add[_node]() before calling
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 376434c7a78b..e46232ec4849 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6979,12 +6979,9 @@ static void __init __free_area_init_node(int nid, 
unsigned long *zones_size,
free_area_init_core(pgdat);
 }
 
-void __init free_area_init_node(int nid, unsigned long *zones_size,
-   unsigned long node_start_pfn,
-   unsigned long *zholes_size)
+void __init free_area_init_memoryless_node(int nid)
 {
-   __free_area_init_node(nid, zones_size, node_start_pfn, zholes_size,
- true);
+   __free_area_init_node(nid, NULL, 0, NULL, false);
 }
 
 #if !defined(CONFIG_FLAT_NODE_MEM_MAP)
-- 
2.25.1



[PATCH 17/21] mm: free_area_init: allow defining max_zone_pfn in descending order

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

Some architectures (e.g. ARC) have the ZONE_HIGHMEM zone below the
ZONE_NORMAL. Allowing free_area_init() parse max_zone_pfn array even it is
sorted in descending order allows using free_area_init() on such
architectures.

Add top -> down traversal of max_zone_pfn array in free_area_init() and use
the latter in ARC node/zone initialization.

Signed-off-by: Mike Rapoport 
---
 arch/arc/mm/init.c | 36 +++-
 mm/page_alloc.c| 24 +++-
 2 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 0920c969c466..41eb9be1653c 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -63,11 +63,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 
size)
 
low_mem_sz = size;
in_use = 1;
+   memblock_add_node(base, size, 0);
} else {
 #ifdef CONFIG_HIGHMEM
high_mem_start = base;
high_mem_sz = size;
in_use = 1;
+   memblock_add_node(base, size, 1);
 #endif
}
 
@@ -83,8 +85,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
  */
 void __init setup_arch_memory(void)
 {
-   unsigned long zones_size[MAX_NR_ZONES];
-   unsigned long zones_holes[MAX_NR_ZONES];
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
 
init_mm.start_code = (unsigned long)_text;
init_mm.end_code = (unsigned long)_etext;
@@ -115,7 +116,6 @@ void __init setup_arch_memory(void)
 * the crash
 */
 
-   memblock_add_node(low_mem_start, low_mem_sz, 0);
memblock_reserve(CONFIG_LINUX_LINK_BASE,
 __pa(_end) - CONFIG_LINUX_LINK_BASE);
 
@@ -133,22 +133,7 @@ void __init setup_arch_memory(void)
memblock_dump_all();
 
/*- node/zones setup --*/
-   memset(zones_size, 0, sizeof(zones_size));
-   memset(zones_holes, 0, sizeof(zones_holes));
-
-   zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn;
-   zones_holes[ZONE_NORMAL] = 0;
-
-   /*
-* We can't use the helper free_area_init(zones[]) because it uses
-* PAGE_OFFSET to compute the @min_low_pfn which would be wrong
-* when our kernel doesn't start at PAGE_OFFSET, i.e.
-* PAGE_OFFSET != CONFIG_LINUX_RAM_BASE
-*/
-   free_area_init_node(0,  /* node-id */
-   zones_size, /* num pages per zone */
-   min_low_pfn,/* first pfn of node */
-   zones_holes);   /* holes */
+   max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_HIGHMEM
/*
@@ -168,20 +153,13 @@ void __init setup_arch_memory(void)
min_high_pfn = PFN_DOWN(high_mem_start);
max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
 
-   zones_size[ZONE_NORMAL] = 0;
-   zones_holes[ZONE_NORMAL] = 0;
-
-   zones_size[ZONE_HIGHMEM] = max_high_pfn - min_high_pfn;
-   zones_holes[ZONE_HIGHMEM] = 0;
-
-   free_area_init_node(1,  /* node-id */
-   zones_size, /* num pages per zone */
-   min_high_pfn,   /* first pfn of node */
-   zones_holes);   /* holes */
+   max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
 
high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
kmap_init();
 #endif
+
+   free_area_init(max_zone_pfn);
 }
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 343d87b8697d..376434c7a78b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7429,7 +7429,8 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
 void __init free_area_init(unsigned long *max_zone_pfn)
 {
unsigned long start_pfn, end_pfn;
-   int i, nid;
+   int i, nid, zone;
+   bool descending = false;
 
/* Record where the zone boundaries are */
memset(arch_zone_lowest_possible_pfn, 0,
@@ -7439,13 +7440,26 @@ void __init free_area_init(unsigned long *max_zone_pfn)
 
start_pfn = find_min_pfn_with_active_regions();
 
+   /*
+* Some architecturs, e.g. ARC may have ZONE_HIGHMEM below
+* ZONE_NORMAL. For such cases we allow max_zone_pfn sorted in the
+* descending order
+*/
+   if (MAX_NR_ZONES > 1 && max_zone_pfn[0] > max_zone_pfn[1])
+   descending = true;
+
for (i = 0; i < MAX_NR_ZONES; i++) {
-   if (i == ZONE_MOVABLE)
+   if (descending)
+   zone = MAX_NR_ZONES - i - 1;
+   else
+   zone = i;
+
+   if (zone == ZONE_MOVABLE)
continue;
 
-   end_pfn = max(max_zone_pfn[i], start_pfn);
-   arch_zone_lowest_possible_pfn[i] = start_pfn;
-   arch_zone_highest_possible_pfn[i] = end_pfn;
+  

[PATCH 16/21] mm: remove early_pfn_in_nid() and CONFIG_NODES_SPAN_OTHER_NODES

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The commit f47ac088c406 ("mm: memmap_init: iterate over memblock regions
rather that check each PFN") made early_pfn_in_nid() obsolete and since
CONFIG_NODES_SPAN_OTHER_NODES is only used to pick a stub or a real
implementation of early_pfn_in_nid() it is also not needed anymore.

Remove both early_pfn_in_nid() and the CONFIG_NODES_SPAN_OTHER_NODES.

Co-developed-by: Hoan Tran 
Signed-off-by: Hoan Tran 
Signed-off-by: Mike Rapoport 
---
 arch/powerpc/Kconfig |  9 -
 arch/sparc/Kconfig   |  9 -
 arch/x86/Kconfig |  9 -
 mm/page_alloc.c  | 20 
 4 files changed, 47 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5f86b22b7d2c..74f316deeae1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -685,15 +685,6 @@ config ARCH_MEMORY_PROBE
def_bool y
depends on MEMORY_HOTPLUG
 
-# Some NUMA nodes have memory ranges that span
-# other nodes.  Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node.  See memmap_init_zone()
-# for details.
-config NODES_SPAN_OTHER_NODES
-   def_bool y
-   depends on NEED_MULTIPLE_NODES
-
 config STDBINUTILS
bool "Using standard binutils settings"
depends on 44x
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 795206b7b552..0e4f3891b904 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -286,15 +286,6 @@ config NODES_SHIFT
  Specify the maximum number of NUMA Nodes available on the target
  system.  Increases memory reserved to accommodate various tables.
 
-# Some NUMA nodes have memory ranges that span
-# other nodes.  Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node.  See memmap_init_zone()
-# for details.
-config NODES_SPAN_OTHER_NODES
-   def_bool y
-   depends on NEED_MULTIPLE_NODES
-
 config ARCH_SPARSEMEM_ENABLE
def_bool y if SPARC64
select SPARSEMEM_VMEMMAP_ENABLE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9d3e95b4fb85..37dac095659e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1581,15 +1581,6 @@ config X86_64_ACPI_NUMA
---help---
  Enable ACPI SRAT based node topology detection.
 
-# Some NUMA nodes have memory ranges that span
-# other nodes.  Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node.  See memmap_init_zone()
-# for details.
-config NODES_SPAN_OTHER_NODES
-   def_bool y
-   depends on X86_64_ACPI_NUMA
-
 config NUMA_EMU
bool "NUMA emulation"
depends on NUMA
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c43ce8709457..343d87b8697d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1541,26 +1541,6 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
 }
 #endif /* CONFIG_NEED_MULTIPLE_NODES */
 
-#ifdef CONFIG_NODES_SPAN_OTHER_NODES
-/* Only safe to use early in boot when initialisation is single-threaded */
-static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
-{
-   int nid;
-
-   nid = __early_pfn_to_nid(pfn, _pfnnid_cache);
-   if (nid >= 0 && nid != node)
-   return false;
-   return true;
-}
-
-#else
-static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
-{
-   return true;
-}
-#endif
-
-
 void __init memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order)
 {
-- 
2.25.1



[PATCH 15/21] mm: memmap_init: iterate over memblock regions rather that check each PFN

2020-04-12 Thread Mike Rapoport
From: Baoquan He 

When called during boot the memmap_init_zone() function checks if each PFN
is valid and actually belongs to the node being initialized using
early_pfn_valid() and early_pfn_in_nid().

Each such check may cost up to O(log(n)) where n is the number of memory
banks, so for large amount of memory overall time spent in early_pfn*()
becomes substantial.

Since the information is anyway present in memblock, we can iterate over
memblock memory regions in memmap_init() and only call memmap_init_zone()
for PFN ranges that are know to be valid and in the appropriate node.

Signed-off-by: Baoquan He 
Signed-off-by: Mike Rapoport 
---
 mm/page_alloc.c | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7f6a3081edb8..c43ce8709457 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5995,14 +5995,6 @@ void __meminit memmap_init_zone(unsigned long size, int 
nid, unsigned long zone,
 * function.  They do not exist on hotplugged memory.
 */
if (context == MEMMAP_EARLY) {
-   if (!early_pfn_valid(pfn)) {
-   pfn = next_pfn(pfn);
-   continue;
-   }
-   if (!early_pfn_in_nid(pfn, nid)) {
-   pfn++;
-   continue;
-   }
if (overlap_memmap_init(zone, ))
continue;
if (defer_init(nid, pfn, end_pfn))
@@ -6118,9 +6110,23 @@ static void __meminit zone_init_free_lists(struct zone 
*zone)
 }
 
 void __meminit __weak memmap_init(unsigned long size, int nid,
- unsigned long zone, unsigned long start_pfn)
+ unsigned long zone,
+ unsigned long range_start_pfn)
 {
-   memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, NULL);
+   unsigned long start_pfn, end_pfn;
+   unsigned long range_end_pfn = range_start_pfn + size;
+   int i;
+
+   for_each_mem_pfn_range(i, nid, _pfn, _pfn, NULL) {
+   start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
+   end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
+
+   if (end_pfn > start_pfn) {
+   size = end_pfn - start_pfn;
+   memmap_init_zone(size, nid, zone, start_pfn,
+MEMMAP_EARLY, NULL);
+   }
+   }
 }
 
 static int zone_batchsize(struct zone *zone)
-- 
2.25.1



[PATCH 14/21] xtensa: simplify detection of memory zone boundaries

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/xtensa/mm/init.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 19c625e6d81f..a05b306cf371 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -70,13 +70,13 @@ void __init bootmem_init(void)
 void __init zones_init(void)
 {
/* All pages are DMA-able, so we put them all in the DMA zone. */
-   unsigned long zones_size[MAX_NR_ZONES] = {
-   [ZONE_NORMAL] = max_low_pfn - ARCH_PFN_OFFSET,
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = {
+   [ZONE_NORMAL] = max_low_pfn,
 #ifdef CONFIG_HIGHMEM
-   [ZONE_HIGHMEM] = max_pfn - max_low_pfn,
+   [ZONE_HIGHMEM] = max_pfn,
 #endif
};
-   free_area_init_node(0, zones_size, ARCH_PFN_OFFSET, NULL);
+   free_area_init(max_zone_pfn);
 }
 
 #ifdef CONFIG_HIGHMEM
-- 
2.25.1



[PATCH 13/21] unicore32: simplify detection of memory zone boundaries

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/unicore32/include/asm/memory.h  |  2 +-
 arch/unicore32/include/mach/memory.h |  6 ++--
 arch/unicore32/kernel/pci.c  | 14 ++---
 arch/unicore32/mm/init.c | 43 ++--
 4 files changed, 15 insertions(+), 50 deletions(-)

diff --git a/arch/unicore32/include/asm/memory.h 
b/arch/unicore32/include/asm/memory.h
index 23c93105f98f..66285178dd9b 100644
--- a/arch/unicore32/include/asm/memory.h
+++ b/arch/unicore32/include/asm/memory.h
@@ -60,7 +60,7 @@
 #ifndef __ASSEMBLY__
 
 #ifndef arch_adjust_zones
-#define arch_adjust_zones(size, holes) do { } while (0)
+#define arch_adjust_zones(max_zone_pfn) do { } while (0)
 #endif
 
 /*
diff --git a/arch/unicore32/include/mach/memory.h 
b/arch/unicore32/include/mach/memory.h
index 2b527cedd03d..b4e6035cb9a3 100644
--- a/arch/unicore32/include/mach/memory.h
+++ b/arch/unicore32/include/mach/memory.h
@@ -25,10 +25,10 @@
 
 #if !defined(__ASSEMBLY__) && defined(CONFIG_PCI)
 
-void puv3_pci_adjust_zones(unsigned long *size, unsigned long *holes);
+void puv3_pci_adjust_zones(unsigned long *max_zone_pfn);
 
-#define arch_adjust_zones(size, holes) \
-   puv3_pci_adjust_zones(size, holes)
+#define arch_adjust_zones(max_zone_pfn) \
+   puv3_pci_adjust_zones(max_zone_pfn)
 
 #endif
 
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index efa04a94dcdb..0d098aa05b47 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -133,21 +133,11 @@ static int pci_puv3_map_irq(const struct pci_dev *dev, u8 
slot, u8 pin)
  * This is really ugly and we need a better way of specifying
  * DMA-capable regions of memory.
  */
-void __init puv3_pci_adjust_zones(unsigned long *zone_size,
-   unsigned long *zhole_size)
+void __init puv3_pci_adjust_zones(unsigned long max_zone_pfn)
 {
unsigned int sz = SZ_128M >> PAGE_SHIFT;
 
-   /*
-* Only adjust if > 128M on current system
-*/
-   if (zone_size[0] <= sz)
-   return;
-
-   zone_size[1] = zone_size[0] - sz;
-   zone_size[0] = sz;
-   zhole_size[1] = zhole_size[0];
-   zhole_size[0] = 0;
+   max_zone_pfn[ZONE_DMA] = sz;
 }
 
 /*
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 6cf010fadc7a..52425d383cea 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -61,46 +61,21 @@ static void __init find_limits(unsigned long *min, unsigned 
long *max_low,
}
 }
 
-static void __init uc32_bootmem_free(unsigned long min, unsigned long max_low,
-   unsigned long max_high)
+static void __init uc32_bootmem_free(unsigned long max_low)
 {
-   unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
-   struct memblock_region *reg;
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
 
-   /*
-* initialise the zones.
-*/
-   memset(zone_size, 0, sizeof(zone_size));
-
-   /*
-* The memory size has already been determined.  If we need
-* to do anything fancy with the allocation of this memory
-* to the zones, now is the time to do it.
-*/
-   zone_size[0] = max_low - min;
-
-   /*
-* Calculate the size of the holes.
-*  holes = node_size - sum(bank_sizes)
-*/
-   memcpy(zhole_size, zone_size, sizeof(zhole_size));
-   for_each_memblock(memory, reg) {
-   unsigned long start = memblock_region_memory_base_pfn(reg);
-   unsigned long end = memblock_region_memory_end_pfn(reg);
-
-   if (start < max_low) {
-   unsigned long low_end = min(end, max_low);
-   zhole_size[0] -= low_end - start;
-   }
-   }
+   max_zone_pfn[ZONE_DMA] = max_low;
+   max_zone_pfn[ZONE_NORMAL] = max_low;
 
/*
 * Adjust the sizes according to any special requirements for
 * this machine type.
+* This might lower ZONE_DMA limit.
 */
-   arch_adjust_zones(zone_size, zhole_size);
+   arch_adjust_zones(max_zone_pfn);
 
-   free_area_init_node(0, zone_size, min, zhole_size);
+   free_area_init(max_zone_pfn);
 }
 
 int pfn_valid(unsigned long pfn)
@@ -176,11 +151,11 @@ void __init bootmem_init(void)
sparse_init();
 
/*
-* Now free the memory - free_area_init_node needs
+* Now free the memory - free_area_init needs
 * the sparse mem_map arrays initialized by sparse_init()
 * for memmap_init_zone(), otherwise all PFNs are invalid.
   

[PATCH 12/21] sparc32: simplify detection of memory zone boundaries

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/sparc/mm/srmmu.c | 21 +
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index b7c94de70cca..cc071dd7d8da 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1008,24 +1008,13 @@ void __init srmmu_paging_init(void)
kmap_init();
 
{
-   unsigned long zones_size[MAX_NR_ZONES];
-   unsigned long zholes_size[MAX_NR_ZONES];
-   unsigned long npages;
-   int znum;
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
 
-   for (znum = 0; znum < MAX_NR_ZONES; znum++)
-   zones_size[znum] = zholes_size[znum] = 0;
+   max_zone_pfn[ZONE_DMA] = max_low_pfn;
+   max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
+   max_zone_pfn[ZONE_HIGHMEM] = highend_pfn;
 
-   npages = max_low_pfn - pfn_base;
-
-   zones_size[ZONE_DMA] = npages;
-   zholes_size[ZONE_DMA] = npages - pages_avail;
-
-   npages = highend_pfn - max_low_pfn;
-   zones_size[ZONE_HIGHMEM] = npages;
-   zholes_size[ZONE_HIGHMEM] = npages - calc_highpages();
-
-   free_area_init_node(0, zones_size, pfn_base, zholes_size);
+   free_area_init(max_zone_pfn);
}
 }
 
-- 
2.25.1



[PATCH 11/21] parisc: simplify detection of memory zone boundaries

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/parisc/mm/init.c | 22 +++---
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 5224fb38d766..02d2fdb85dcc 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -675,27 +675,11 @@ static void __init gateway_init(void)
 
 static void __init parisc_bootmem_free(void)
 {
-   unsigned long zones_size[MAX_NR_ZONES] = { 0, };
-   unsigned long holes_size[MAX_NR_ZONES] = { 0, };
-   unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0;
-   int i;
-
-   for (i = 0; i < npmem_ranges; i++) {
-   unsigned long start = pmem_ranges[i].start_pfn;
-   unsigned long size = pmem_ranges[i].pages;
-   unsigned long end = start + size;
-
-   if (mem_start_pfn > start)
-   mem_start_pfn = start;
-   if (mem_end_pfn < end)
-   mem_end_pfn = end;
-   mem_size_pfn += size;
-   }
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, };
 
-   zones_size[0] = mem_end_pfn - mem_start_pfn;
-   holes_size[0] = zones_size[0] - mem_size_pfn;
+   max_zone_pfn[0] = memblock_end_of_DRAM();
 
-   free_area_init_node(0, zones_size, mem_start_pfn, holes_size);
+   free_area_init(max_zone_pfn);
 }
 
 void __init paging_init(void)
-- 
2.25.1



[PATCH 10/21] m68k: mm: simplify detection of memory zone boundaries

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/m68k/mm/motorola.c | 11 +--
 arch/m68k/mm/sun3mmu.c  | 10 +++---
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 84ab5963cabb..904c2a663977 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -365,7 +365,7 @@ static void __init map_node(int node)
  */
 void __init paging_init(void)
 {
-   unsigned long zones_size[MAX_NR_ZONES] = { 0, };
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, };
unsigned long min_addr, max_addr;
unsigned long addr;
int i;
@@ -448,11 +448,10 @@ void __init paging_init(void)
 #ifdef DEBUG
printk ("before free_area_init\n");
 #endif
-   for (i = 0; i < m68k_num_memory; i++) {
-   zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT;
-   free_area_init_node(i, zones_size,
-   m68k_memory[i].addr >> PAGE_SHIFT, NULL);
+   for (i = 0; i < m68k_num_memory; i++)
if (node_present_pages(i))
node_set_state(i, N_NORMAL_MEMORY);
-   }
+
+   max_zone_pfn[ZONE_DMA] = memblock_end_of_DRAM();
+   free_area_init(max_zone_pfn);
 }
diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index eca1c46bb90a..5d8d956d9329 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -42,7 +42,7 @@ void __init paging_init(void)
unsigned long address;
unsigned long next_pgtable;
unsigned long bootmem_end;
-   unsigned long zones_size[MAX_NR_ZONES] = { 0, };
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, };
unsigned long size;
 
empty_zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
@@ -89,14 +89,10 @@ void __init paging_init(void)
current->mm = NULL;
 
/* memory sizing is a hack stolen from motorola.c..  hope it works for 
us */
-   zones_size[ZONE_DMA] = ((unsigned long)high_memory - PAGE_OFFSET) >> 
PAGE_SHIFT;
+   max_zone_pfn[ZONE_DMA] = ((unsigned long)high_memory) >> PAGE_SHIFT;
 
/* I really wish I knew why the following change made things better...  
-- Sam */
-/* free_area_init(zones_size); */
-   free_area_init_node(0, zones_size,
-   (__pa(PAGE_OFFSET) >> PAGE_SHIFT) + 1, NULL);
+   free_area_init(max_zone_pfn);
 
 
 }
-
-
-- 
2.25.1



[PATCH 09/21] csky: simplify detection of memory zone boundaries

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/csky/kernel/setup.c | 26 +++---
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index 819a9a7bf786..0481f4e34538 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -26,7 +26,9 @@ struct screen_info screen_info = {
 
 static void __init csky_memblock_init(void)
 {
-   unsigned long zone_size[MAX_NR_ZONES];
+   unsigned long lowmem_size = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET);
+   unsigned long sseg_size = PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET);
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
signed long size;
 
memblock_reserve(__pa(_stext), _end - _stext);
@@ -36,28 +38,22 @@ static void __init csky_memblock_init(void)
 
memblock_dump_all();
 
-   memset(zone_size, 0, sizeof(zone_size));
-
min_low_pfn = PFN_UP(memblock_start_of_DRAM());
max_low_pfn = max_pfn = PFN_DOWN(memblock_end_of_DRAM());
 
size = max_pfn - min_low_pfn;
 
-   if (size <= PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET))
-   zone_size[ZONE_NORMAL] = size;
-   else if (size < PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET)) {
-   zone_size[ZONE_NORMAL] =
-   PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET);
-   max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL];
-   } else {
-   zone_size[ZONE_NORMAL] =
-   PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET);
-   max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL];
+   if (size >= lowmem_size) {
+   max_low_pfn = min_low_pfn + lowmem_size;
write_mmu_msa1(read_mmu_msa0() + SSEG_SIZE);
+   } else if (size > sseg_size) {
+   max_low_pfn = min_low_pfn + sseg_size;
}
 
+   max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
+
 #ifdef CONFIG_HIGHMEM
-   zone_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn;
+   max_zone_pfn[ZONE_HIGHMEM] = max_pfn;
 
highstart_pfn = max_low_pfn;
highend_pfn   = max_pfn;
@@ -66,7 +62,7 @@ static void __init csky_memblock_init(void)
 
dma_contiguous_reserve(0);
 
-   free_area_init_node(0, zone_size, min_low_pfn, NULL);
+   free_area_init(max_zone_pfn);
 }
 
 void __init setup_arch(char **cmdline_p)
-- 
2.25.1



[PATCH 08/21] arm64: simplify detection of memory zone boundaries for UMA configs

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/mm/init.c | 54 
 1 file changed, 54 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a650adb358ee..d54ad2250dce 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -192,8 +192,6 @@ static phys_addr_t __init max_zone_phys(unsigned int 
zone_bits)
return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
 }
 
-#ifdef CONFIG_NUMA
-
 static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
@@ -209,58 +207,6 @@ static void __init zone_sizes_init(unsigned long min, 
unsigned long max)
free_area_init(max_zone_pfns);
 }
 
-#else
-
-static void __init zone_sizes_init(unsigned long min, unsigned long max)
-{
-   struct memblock_region *reg;
-   unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
-   unsigned long __maybe_unused max_dma, max_dma32;
-
-   memset(zone_size, 0, sizeof(zone_size));
-
-   max_dma = max_dma32 = min;
-#ifdef CONFIG_ZONE_DMA
-   max_dma = max_dma32 = PFN_DOWN(arm64_dma_phys_limit);
-   zone_size[ZONE_DMA] = max_dma - min;
-#endif
-#ifdef CONFIG_ZONE_DMA32
-   max_dma32 = PFN_DOWN(arm64_dma32_phys_limit);
-   zone_size[ZONE_DMA32] = max_dma32 - max_dma;
-#endif
-   zone_size[ZONE_NORMAL] = max - max_dma32;
-
-   memcpy(zhole_size, zone_size, sizeof(zhole_size));
-
-   for_each_memblock(memory, reg) {
-   unsigned long start = memblock_region_memory_base_pfn(reg);
-   unsigned long end = memblock_region_memory_end_pfn(reg);
-
-#ifdef CONFIG_ZONE_DMA
-   if (start >= min && start < max_dma) {
-   unsigned long dma_end = min(end, max_dma);
-   zhole_size[ZONE_DMA] -= dma_end - start;
-   start = dma_end;
-   }
-#endif
-#ifdef CONFIG_ZONE_DMA32
-   if (start >= max_dma && start < max_dma32) {
-   unsigned long dma32_end = min(end, max_dma32);
-   zhole_size[ZONE_DMA32] -= dma32_end - start;
-   start = dma32_end;
-   }
-#endif
-   if (start >= max_dma32 && start < max) {
-   unsigned long normal_end = min(end, max);
-   zhole_size[ZONE_NORMAL] -= normal_end - start;
-   }
-   }
-
-   free_area_init_node(0, zone_size, min, zhole_size);
-}
-
-#endif /* CONFIG_NUMA */
-
 int pfn_valid(unsigned long pfn)
 {
phys_addr_t addr = pfn << PAGE_SHIFT;
-- 
2.25.1



[PATCH 07/21] arm: simplify detection of memory zone boundaries

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c | 66 +-
 1 file changed, 7 insertions(+), 59 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 054be44d1cdb..4e43455fab84 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -92,18 +92,6 @@ EXPORT_SYMBOL(arm_dma_zone_size);
  */
 phys_addr_t arm_dma_limit;
 unsigned long arm_dma_pfn_limit;
-
-static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long 
*hole,
-   unsigned long dma_size)
-{
-   if (size[0] <= dma_size)
-   return;
-
-   size[ZONE_NORMAL] = size[0] - dma_size;
-   size[ZONE_DMA] = dma_size;
-   hole[ZONE_NORMAL] = hole[0];
-   hole[ZONE_DMA] = 0;
-}
 #endif
 
 void __init setup_dma_zone(const struct machine_desc *mdesc)
@@ -121,56 +109,16 @@ void __init setup_dma_zone(const struct machine_desc 
*mdesc)
 static void __init zone_sizes_init(unsigned long min, unsigned long max_low,
unsigned long max_high)
 {
-   unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
-   struct memblock_region *reg;
-
-   /*
-* initialise the zones.
-*/
-   memset(zone_size, 0, sizeof(zone_size));
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
 
-   /*
-* The memory size has already been determined.  If we need
-* to do anything fancy with the allocation of this memory
-* to the zones, now is the time to do it.
-*/
-   zone_size[0] = max_low - min;
-#ifdef CONFIG_HIGHMEM
-   zone_size[ZONE_HIGHMEM] = max_high - max_low;
+#ifdef CONFIG_ZONE_DMA
+   max_zone_pfn[ZONE_DMA] = min(arm_dma_pfn_limit, max_low);
 #endif
-
-   /*
-* Calculate the size of the holes.
-*  holes = node_size - sum(bank_sizes)
-*/
-   memcpy(zhole_size, zone_size, sizeof(zhole_size));
-   for_each_memblock(memory, reg) {
-   unsigned long start = memblock_region_memory_base_pfn(reg);
-   unsigned long end = memblock_region_memory_end_pfn(reg);
-
-   if (start < max_low) {
-   unsigned long low_end = min(end, max_low);
-   zhole_size[0] -= low_end - start;
-   }
+   max_zone_pfn[ZONE_NORMAL] = max_low;
 #ifdef CONFIG_HIGHMEM
-   if (end > max_low) {
-   unsigned long high_start = max(start, max_low);
-   zhole_size[ZONE_HIGHMEM] -= end - high_start;
-   }
+   max_zone_pfn[ZONE_HIGHMEM] = max_high;
 #endif
-   }
-
-#ifdef CONFIG_ZONE_DMA
-   /*
-* Adjust the sizes according to any special requirements for
-* this machine type.
-*/
-   if (arm_dma_zone_size)
-   arm_adjust_dma_zone(zone_size, zhole_size,
-   arm_dma_zone_size >> PAGE_SHIFT);
-#endif
-
-   free_area_init_node(0, zone_size, min, zhole_size);
+   free_area_init(max_zone_pfn);
 }
 
 #ifdef CONFIG_HAVE_ARCH_PFN_VALID
@@ -306,7 +254,7 @@ void __init bootmem_init(void)
sparse_init();
 
/*
-* Now free the memory - free_area_init_node needs
+* Now free the memory - free_area_init needs
 * the sparse mem_map arrays initialized by sparse_init()
 * for memmap_init_zone(), otherwise all PFNs are invalid.
 */
-- 
2.25.1



[PATCH 06/21] alpha: simplify detection of memory zone boundaries

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() function only requires the definition of maximal PFN
for each of the supported zone rater than calculation of actual zone sizes
and the sizes of the holes between the zones.

After removal of CONFIG_HAVE_MEMBLOCK_NODE_MAP the free_area_init() is
available to all architectures.

Using this function instead of free_area_init_node() simplifies the zone
detection.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/mm/numa.c | 18 --
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index a24cd13e71cb..5ad6087de1d6 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -202,8 +202,7 @@ setup_memory(void *kernel_end)
 
 void __init paging_init(void)
 {
-   unsigned intnid;
-   unsigned long   zones_size[MAX_NR_ZONES] = {0, };
+   unsigned long   max_zone_pfn[MAX_NR_ZONES] = {0, };
unsigned long   dma_local_pfn;
 
/*
@@ -215,19 +214,10 @@ void __init paging_init(void)
 */
dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
-   for_each_online_node(nid) {
-   unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
-   unsigned long end_pfn = start_pfn + 
NODE_DATA(nid)->node_present_pages;
+   max_zone_pfn[ZONE_DMA] = dma_local_pfn;
+   max_zone_pfn[ZONE_NORMAL] = max_pfn;
 
-   if (dma_local_pfn >= end_pfn - start_pfn)
-   zones_size[ZONE_DMA] = end_pfn - start_pfn;
-   else {
-   zones_size[ZONE_DMA] = dma_local_pfn;
-   zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - 
dma_local_pfn;
-   }
-   node_set_state(nid, N_NORMAL_MEMORY);
-   free_area_init_node(nid, zones_size, start_pfn, NULL);
-   }
+   free_area_init(max_zone_pfn);
 
/* Initialize the kernel's ZERO_PGE. */
memset((void *)ZERO_PGE, 0, PAGE_SIZE);
-- 
2.25.1



[PATCH 05/21] mm: use free_area_init() instead of free_area_init_nodes()

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The free_area_init() has effectively became a wrapper for
free_area_init_nodes() and there is no point of keeping it. Still
free_area_init() name is shorter and more general as it does not imply
necessity to initialize multiple nodes.

Rename free_area_init_nodes() to free_area_init(), update the callers and
drop old version of free_area_init().

Signed-off-by: Mike Rapoport 
---
 arch/arm64/mm/init.c |  2 +-
 arch/ia64/mm/contig.c|  2 +-
 arch/ia64/mm/discontig.c |  2 +-
 arch/microblaze/mm/init.c|  2 +-
 arch/mips/loongson64/numa.c  |  2 +-
 arch/mips/mm/init.c  |  2 +-
 arch/mips/sgi-ip27/ip27-memory.c |  2 +-
 arch/powerpc/mm/mem.c|  2 +-
 arch/riscv/mm/init.c |  2 +-
 arch/s390/mm/init.c  |  2 +-
 arch/sh/mm/init.c|  2 +-
 arch/sparc/mm/init_64.c  |  2 +-
 arch/x86/mm/init.c   |  2 +-
 include/linux/mm.h   |  7 +++
 mm/page_alloc.c  | 10 ++
 15 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index e42727e3568e..a650adb358ee 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -206,7 +206,7 @@ static void __init zone_sizes_init(unsigned long min, 
unsigned long max)
 #endif
max_zone_pfns[ZONE_NORMAL] = max;
 
-   free_area_init_nodes(max_zone_pfns);
+   free_area_init(max_zone_pfns);
 }
 
 #else
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 5b00dc3898e1..8786fa5c7612 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -210,6 +210,6 @@ paging_init (void)
printk("Virtual mem_map starts at 0x%p\n", mem_map);
}
 #endif /* !CONFIG_VIRTUAL_MEM_MAP */
-   free_area_init_nodes(max_zone_pfns);
+   free_area_init(max_zone_pfns);
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 4f33f6e7e206..dd8284bcbf16 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -627,7 +627,7 @@ void __init paging_init(void)
max_zone_pfns[ZONE_DMA32] = max_dma;
 #endif
max_zone_pfns[ZONE_NORMAL] = max_pfn;
-   free_area_init_nodes(max_zone_pfns);
+   free_area_init(max_zone_pfns);
 
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 1ffbfa96b9b8..dcaa53d11339 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -112,7 +112,7 @@ static void __init paging_init(void)
 #endif
 
/* We don't have holes in memory map */
-   free_area_init_nodes(zones_size);
+   free_area_init(zones_size);
 }
 
 void __init setup_memory(void)
diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
index 1ae072df4831..901f5be5ee76 100644
--- a/arch/mips/loongson64/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -247,7 +247,7 @@ void __init paging_init(void)
zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
 #endif
zones_size[ZONE_NORMAL] = max_low_pfn;
-   free_area_init_nodes(zones_size);
+   free_area_init(zones_size);
 }
 
 void __init mem_init(void)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 79684000de0e..19719e8b41a5 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -418,7 +418,7 @@ void __init paging_init(void)
}
 #endif
 
-   free_area_init_nodes(max_zone_pfns);
+   free_area_init(max_zone_pfns);
 }
 
 #ifdef CONFIG_64BIT
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index a45691e6ab90..1213215ea965 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -419,7 +419,7 @@ void __init paging_init(void)
 
pagetable_init();
zones_size[ZONE_NORMAL] = max_low_pfn;
-   free_area_init_nodes(zones_size);
+   free_area_init(zones_size);
 }
 
 void __init mem_init(void)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 041ed7cfd341..0fcea21f26b4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -271,7 +271,7 @@ void __init paging_init(void)
max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
 #endif
 
-   free_area_init_nodes(max_zone_pfns);
+   free_area_init(max_zone_pfns);
 
mark_nonram_nosave();
 }
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b55be44ff9bd..f2ceab77b8e6 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -39,7 +39,7 @@ static void __init zone_sizes_init(void)
 #endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
-   free_area_init_nodes(max_zone_pfns);
+   free_area_init(max_zone_pfns);
 }
 
 static void setup_zero_page(void)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 87b2d024e75a..b11bcf4da531 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -122,7 +122,7 @@ void 

[PATCH 04/21] mm: free_area_init: use maximal zone PFNs rather than zone sizes

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

Currently, architectures that use free_area_init() to initialize memory map
and node and zone structures need to calculate zone and hole sizes. We can
use free_area_init_nodes() instead and let it detect the zone boundaries
while the architectures will only have to supply the possible limits for
the zones.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/mm/init.c| 16 ++--
 arch/c6x/mm/init.c  |  8 +++-
 arch/h8300/mm/init.c|  6 +++---
 arch/hexagon/mm/init.c  |  6 +++---
 arch/m68k/mm/init.c |  6 +++---
 arch/m68k/mm/mcfmmu.c   |  9 +++--
 arch/nds32/mm/init.c| 11 ---
 arch/nios2/mm/init.c|  8 +++-
 arch/openrisc/mm/init.c |  9 +++--
 arch/um/kernel/mem.c| 12 
 include/linux/mm.h  |  2 +-
 mm/page_alloc.c |  5 ++---
 12 files changed, 38 insertions(+), 60 deletions(-)

diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 12e218d3792a..667cd21393b5 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -243,21 +243,17 @@ callback_init(void * kernel_end)
  */
 void __init paging_init(void)
 {
-   unsigned long zones_size[MAX_NR_ZONES] = {0, };
-   unsigned long dma_pfn, high_pfn;
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = {0, };
+   unsigned long dma_pfn;
 
dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-   high_pfn = max_pfn = max_low_pfn;
+   max_pfn = max_low_pfn;
 
-   if (dma_pfn >= high_pfn)
-   zones_size[ZONE_DMA] = high_pfn;
-   else {
-   zones_size[ZONE_DMA] = dma_pfn;
-   zones_size[ZONE_NORMAL] = high_pfn - dma_pfn;
-   }
+   max_zone_pfn[ZONE_DMA] = dma_pfn;
+   max_zone_pfn[ZONE_NORMAL] = max_pfn;
 
/* Initialize mem_map[].  */
-   free_area_init(zones_size);
+   free_area_init(max_zone_pfn);
 
/* Initialize the kernel's ZERO_PGE. */
memset((void *)ZERO_PGE, 0, PAGE_SIZE);
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index 9b374393a8f4..a97e51a3e26d 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(empty_zero_page);
 void __init paging_init(void)
 {
struct pglist_data *pgdat = NODE_DATA(0);
-   unsigned long zones_size[MAX_NR_ZONES] = {0, };
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = {0, };
 
empty_zero_page  = (unsigned long) memblock_alloc(PAGE_SIZE,
  PAGE_SIZE);
@@ -49,11 +49,9 @@ void __init paging_init(void)
/*
 * Define zones
 */
-   zones_size[ZONE_NORMAL] = (memory_end - PAGE_OFFSET) >> PAGE_SHIFT;
-   pgdat->node_zones[ZONE_NORMAL].zone_start_pfn =
-   __pa(PAGE_OFFSET) >> PAGE_SHIFT;
+   max_zone_pfn[ZONE_NORMAL] = memory_end >> PAGE_SHIFT;
 
-   free_area_init(zones_size);
+   free_area_init(max_zone_pfn);
 }
 
 void __init mem_init(void)
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 1eab16b1a0bc..27a0020e3771 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -83,10 +83,10 @@ void __init paging_init(void)
 start_mem, end_mem);
 
{
-   unsigned long zones_size[MAX_NR_ZONES] = {0, };
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = {0, };
 
-   zones_size[ZONE_NORMAL] = (end_mem - PAGE_OFFSET) >> PAGE_SHIFT;
-   free_area_init(zones_size);
+   max_zone_pfn[ZONE_NORMAL] = end_mem >> PAGE_SHIFT;
+   free_area_init(max_zone_pfn);
}
 }
 
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index c961773a6fff..f2e6c868e477 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -91,7 +91,7 @@ void sync_icache_dcache(pte_t pte)
  */
 void __init paging_init(void)
 {
-   unsigned long zones_sizes[MAX_NR_ZONES] = {0, };
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = {0, };
 
/*
 *  This is not particularly well documented anywhere, but
@@ -101,9 +101,9 @@ void __init paging_init(void)
 *  adjust accordingly.
 */
 
-   zones_sizes[ZONE_NORMAL] = max_low_pfn;
+   max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
 
-   free_area_init(zones_sizes);  /*  sets up the zonelists and mem_map  */
+   free_area_init(max_zone_pfn);  /*  sets up the zonelists and mem_map  */
 
/*
 * Start of high memory area.  Will probably need something more
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index b88d510d4fe3..6d3147662ff2 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -84,7 +84,7 @@ void __init paging_init(void)
 * page_alloc get different views of the world.
 */
unsigned long end_mem = memory_end & PAGE_MASK;
-   unsigned long zones_size[MAX_NR_ZONES] = { 0, };
+   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, };
 
high_memory = (void *) end_mem;
 
@@ -98,8 +98,8 @@ void 

[PATCH 03/21] mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The CONFIG_HAVE_MEMBLOCK_NODE_MAP is used to differentiate initialization
of nodes and zones structures between the systems that have region to node
mapping in memblock and those that don't.

Currently all the NUMA architectures enable this option and for the
non-NUMA systems we can presume that all the memory belongs to node 0 and
therefore the compile time configuration option is not required.

The remaining few architectures that use DISCONTIGMEM without NUMA are
easily updated to use memblock_add_node() instead of memblock_add() and
thus have proper correspondence of memblock regions to NUMA nodes.

Still, free_area_init_node() must have a backward compatible version
because its semantics with and without CONFIG_HAVE_MEMBLOCK_NODE_MAP is
different. Once all the architectures will use the new semantics, the
entire compatibility layer can be dropped.

To avoid addition of extra run time memory to store node id for
architectures that keep memblock but have only a single node, the node id
field of the memblock_region is guarded by CONFIG_NEED_MULTIPLE_NODES and
the corresponding accessors presume that in those cases it is always 0.

Signed-off-by: Mike Rapoport 
---
 .../vm/numa-memblock/arch-support.txt |  34 --
 arch/alpha/mm/numa.c  |   4 +-
 arch/arm64/Kconfig|   1 -
 arch/ia64/Kconfig |   1 -
 arch/m68k/mm/motorola.c   |   4 +-
 arch/microblaze/Kconfig   |   1 -
 arch/mips/Kconfig |   1 -
 arch/powerpc/Kconfig  |   1 -
 arch/riscv/Kconfig|   1 -
 arch/s390/Kconfig |   1 -
 arch/sh/Kconfig   |   1 -
 arch/sparc/Kconfig|   1 -
 arch/x86/Kconfig  |   1 -
 include/linux/memblock.h  |   8 +-
 include/linux/mm.h|  12 +--
 include/linux/mmzone.h|   2 +-
 mm/Kconfig|   3 -
 mm/memblock.c |  11 +-
 mm/memory_hotplug.c   |   4 -
 mm/page_alloc.c   | 101 ++
 20 files changed, 74 insertions(+), 119 deletions(-)
 delete mode 100644 Documentation/features/vm/numa-memblock/arch-support.txt

diff --git a/Documentation/features/vm/numa-memblock/arch-support.txt 
b/Documentation/features/vm/numa-memblock/arch-support.txt
deleted file mode 100644
index 3004beb0fd71..
--- a/Documentation/features/vm/numa-memblock/arch-support.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-#
-# Feature name:  numa-memblock
-# Kconfig:   HAVE_MEMBLOCK_NODE_MAP
-# description:   arch supports NUMA aware memblocks
-#
----
-| arch |status|
----
-|   alpha: | TODO |
-| arc: |  ..  |
-| arm: |  ..  |
-|   arm64: |  ok  |
-| c6x: |  ..  |
-|csky: |  ..  |
-|   h8300: |  ..  |
-| hexagon: |  ..  |
-|ia64: |  ok  |
-|m68k: |  ..  |
-|  microblaze: |  ok  |
-|mips: |  ok  |
-|   nds32: | TODO |
-|   nios2: |  ..  |
-|openrisc: |  ..  |
-|  parisc: |  ..  |
-| powerpc: |  ok  |
-|   riscv: |  ok  |
-|s390: |  ok  |
-|  sh: |  ok  |
-|   sparc: |  ok  |
-|  um: |  ..  |
-|   unicore32: |  ..  |
-| x86: |  ok  |
-|  xtensa: |  ..  |
----
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index d0b73371e985..a24cd13e71cb 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -144,8 +144,8 @@ setup_memory_node(int nid, void *kernel_end)
if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > 
start_kernel_pfn))
panic("kernel loaded out of ram");
 
-   memblock_add(PFN_PHYS(node_min_pfn),
-(node_max_pfn - node_min_pfn) << PAGE_SHIFT);
+   memblock_add_node(PFN_PHYS(node_min_pfn),
+ (node_max_pfn - node_min_pfn) << PAGE_SHIFT, nid);
 
/* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
   Note that we round this down, not up - node memory
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 40fb05d96c60..957151013d10 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -156,7 +156,6 @@ config ARM64
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
-   select HAVE_MEMBLOCK_NODE_MAP if NUMA
select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index bab7cd878464..88b05b5256a9 100644

[PATCH 02/21] mm: make early_pfn_to_nid() and related defintions close to each other

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

The early_pfn_to_nid() and it's helper __early_pfn_to_nid() are spread
around include/linux/mm.h, include/linux/mmzone.h and mm/page_alloc.c.

Drop unused stub for __early_pfn_to_nid() and move its actual generic
implementation close to its users.

Signed-off-by: Mike Rapoport 
---
 include/linux/mm.h |  4 ++--
 include/linux/mmzone.h |  9 
 mm/page_alloc.c| 51 +-
 3 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5a323422d783..a404026d14d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2388,9 +2388,9 @@ extern void sparse_memory_present_with_active_regions(int 
nid);
 
 #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
 !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
-static inline int __early_pfn_to_nid(unsigned long pfn,
-   struct mminit_pfnnid_cache *state)
+static inline int early_pfn_to_nid(unsigned long pfn)
 {
+   BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
return 0;
 }
 #else
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b9de7d220fb..7b5b6eba402f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1078,15 +1078,6 @@ static inline struct zoneref 
*first_zones_zonelist(struct zonelist *zonelist,
 #include 
 #endif
 
-#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
-   !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
-static inline unsigned long early_pfn_to_nid(unsigned long pfn)
-{
-   BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
-   return 0;
-}
-#endif
-
 #ifdef CONFIG_FLATMEM
 #define pfn_to_nid(pfn)(0)
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0d012eda1694..1ac775bfc9cf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1504,6 +1504,31 @@ void __free_pages_core(struct page *page, unsigned int 
order)
 
 static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
 
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+
+/*
+ * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
+ */
+int __meminit __early_pfn_to_nid(unsigned long pfn,
+   struct mminit_pfnnid_cache *state)
+{
+   unsigned long start_pfn, end_pfn;
+   int nid;
+
+   if (state->last_start <= pfn && pfn < state->last_end)
+   return state->last_nid;
+
+   nid = memblock_search_pfn_nid(pfn, _pfn, _pfn);
+   if (nid != NUMA_NO_NODE) {
+   state->last_start = start_pfn;
+   state->last_end = end_pfn;
+   state->last_nid = nid;
+   }
+
+   return nid;
+}
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+
 int __meminit early_pfn_to_nid(unsigned long pfn)
 {
static DEFINE_SPINLOCK(early_pfn_lock);
@@ -6298,32 +6323,6 @@ void __meminit init_currently_empty_zone(struct zone 
*zone,
zone->initialized = 1;
 }
 
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-
-/*
- * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
- */
-int __meminit __early_pfn_to_nid(unsigned long pfn,
-   struct mminit_pfnnid_cache *state)
-{
-   unsigned long start_pfn, end_pfn;
-   int nid;
-
-   if (state->last_start <= pfn && pfn < state->last_end)
-   return state->last_nid;
-
-   nid = memblock_search_pfn_nid(pfn, _pfn, _pfn);
-   if (nid != NUMA_NO_NODE) {
-   state->last_start = start_pfn;
-   state->last_end = end_pfn;
-   state->last_nid = nid;
-   }
-
-   return nid;
-}
-#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
-
 /**
  * free_bootmem_with_active_regions - Call memblock_free_early_nid for each 
active range
  * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
-- 
2.25.1



[PATCH 01/21] mm: memblock: replace dereferences of memblock_region.nid with API calls

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

There are several places in the code that directly dereference
memblock_region.nid despite this field being defined only when
CONFIG_HAVE_MEMBLOCK_NODE_MAP=y.

Replace these with calls to memblock_get_region_nid() to improve code
robustness and to avoid possible breakage when
CONFIG_HAVE_MEMBLOCK_NODE_MAP will be removed.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/mm/numa.c | 9 ++---
 arch/x86/mm/numa.c   | 6 --
 mm/memblock.c| 8 +---
 mm/page_alloc.c  | 4 ++--
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 4decf1659700..aafcee3e3f7e 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -350,13 +350,16 @@ static int __init numa_register_nodes(void)
struct memblock_region *mblk;
 
/* Check that valid nid is set to memblks */
-   for_each_memblock(memory, mblk)
-   if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) {
+   for_each_memblock(memory, mblk) {
+   int mblk_nid = memblock_get_region_node(mblk);
+
+   if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
pr_warn("Warning: invalid memblk node %d [mem 
%#010Lx-%#010Lx]\n",
-   mblk->nid, mblk->base,
+   mblk_nid, mblk->base,
mblk->base + mblk->size - 1);
return -EINVAL;
}
+   }
 
/* Finally register nodes. */
for_each_node_mask(nid, numa_nodes_parsed) {
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 59ba008504dc..fe024b2ac796 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -517,8 +517,10 @@ static void __init numa_clear_kernel_node_hotplug(void)
 *   reserve specific pages for Sandy Bridge graphics. ]
 */
for_each_memblock(reserved, mb_region) {
-   if (mb_region->nid != MAX_NUMNODES)
-   node_set(mb_region->nid, reserved_nodemask);
+   int nid = memblock_get_region_node(mb_region);
+
+   if (nid != MAX_NUMNODES)
+   node_set(nid, reserved_nodemask);
}
 
/*
diff --git a/mm/memblock.c b/mm/memblock.c
index c79ba6f9920c..43e2fd3006c1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1207,13 +1207,15 @@ void __init_memblock __next_mem_pfn_range(int *idx, int 
nid,
 {
struct memblock_type *type = 
struct memblock_region *r;
+   int r_nid;
 
while (++*idx < type->cnt) {
r = >regions[*idx];
+   r_nid = memblock_get_region_node(r);
 
if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
continue;
-   if (nid == MAX_NUMNODES || nid == r->nid)
+   if (nid == MAX_NUMNODES || nid == r_nid)
break;
}
if (*idx >= type->cnt) {
@@ -1226,7 +1228,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int 
nid,
if (out_end_pfn)
*out_end_pfn = PFN_DOWN(r->base + r->size);
if (out_nid)
-   *out_nid = r->nid;
+   *out_nid = r_nid;
 }
 
 /**
@@ -1810,7 +1812,7 @@ int __init_memblock memblock_search_pfn_nid(unsigned long 
pfn,
*start_pfn = PFN_DOWN(type->regions[mid].base);
*end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size);
 
-   return type->regions[mid].nid;
+   return memblock_get_region_node(>regions[mid]);
 }
 #endif
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 69827d4fa052..0d012eda1694 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7208,7 +7208,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
if (!memblock_is_hotpluggable(r))
continue;
 
-   nid = r->nid;
+   nid = memblock_get_region_node(r);
 
usable_startpfn = PFN_DOWN(r->base);
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
@@ -7229,7 +7229,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
if (memblock_is_mirror(r))
continue;
 
-   nid = r->nid;
+   nid = memblock_get_region_node(r);
 
usable_startpfn = memblock_region_memory_base_pfn(r);
 
-- 
2.25.1



[PATCH 00/21] mm: rework free_area_init*() funcitons

2020-04-12 Thread Mike Rapoport
From: Mike Rapoport 

Hi,

After the discussion [1] about removal of CONFIG_NODES_SPAN_OTHER_NODES and
CONFIG_HAVE_MEMBLOCK_NODE_MAP options, I took it a bit further and updated
the node/zone initialization. 

Since all architectures have memblock, it is possible to use only the newer
version of free_area_init_node() that calculates the zone and node
boundaries based on memblock node mapping and architectural limits on
possible zone PFNs. 

The architectures that still determined zone and hole sizes can be switched
to the generic code and the old code that took those zone and hole sizes
can be simply removed.

And, since it all started from the removal of
CONFIG_NODES_SPAN_OTHER_NODES, the memmap_init() is now updated to iterate
over memblocks and so it does not need to perform early_pfn_to_nid() query
for every PFN.

--
Sincerely yours,
Mike.

[1] 
https://lore.kernel.org/lkml/1585420282-25630-1-git-send-email-h...@os.amperecomputing.com

Baoquan He (1):
  mm: memmap_init: iterate over memblock regions rather that check each PFN

Mike Rapoport (20):
  mm: memblock: replace dereferences of memblock_region.nid with API calls
  mm: make early_pfn_to_nid() and related defintions close to each other
  mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option
  mm: free_area_init: use maximal zone PFNs rather than zone sizes
  mm: use free_area_init() instead of free_area_init_nodes()
  alpha: simplify detection of memory zone boundaries
  arm: simplify detection of memory zone boundaries
  arm64: simplify detection of memory zone boundaries for UMA configs
  csky: simplify detection of memory zone boundaries
  m68k: mm: simplify detection of memory zone boundaries
  parisc: simplify detection of memory zone boundaries
  sparc32: simplify detection of memory zone boundaries
  unicore32: simplify detection of memory zone boundaries
  xtensa: simplify detection of memory zone boundaries
  mm: remove early_pfn_in_nid() and CONFIG_NODES_SPAN_OTHER_NODES
  mm: free_area_init: allow defining max_zone_pfn in descending order
  mm: rename free_area_init_node() to free_area_init_memoryless_node()
  mm: clean up free_area_init_node() and its helpers
  mm: simplify find_min_pfn_with_active_regions()
  docs/vm: update memory-models documentation

 .../vm/numa-memblock/arch-support.txt |  34 ---
 Documentation/vm/memory-model.rst |   9 +-
 arch/alpha/mm/init.c  |  16 +-
 arch/alpha/mm/numa.c  |  22 +-
 arch/arc/mm/init.c|  36 +--
 arch/arm/mm/init.c|  66 +
 arch/arm64/Kconfig|   1 -
 arch/arm64/mm/init.c  |  56 +---
 arch/arm64/mm/numa.c  |   9 +-
 arch/c6x/mm/init.c|   8 +-
 arch/csky/kernel/setup.c  |  26 +-
 arch/h8300/mm/init.c  |   6 +-
 arch/hexagon/mm/init.c|   6 +-
 arch/ia64/Kconfig |   1 -
 arch/ia64/mm/contig.c |   2 +-
 arch/ia64/mm/discontig.c  |   2 +-
 arch/m68k/mm/init.c   |   6 +-
 arch/m68k/mm/mcfmmu.c |   9 +-
 arch/m68k/mm/motorola.c   |  15 +-
 arch/m68k/mm/sun3mmu.c|  10 +-
 arch/microblaze/Kconfig   |   1 -
 arch/microblaze/mm/init.c |   2 +-
 arch/mips/Kconfig |   1 -
 arch/mips/loongson64/numa.c   |   2 +-
 arch/mips/mm/init.c   |   2 +-
 arch/mips/sgi-ip27/ip27-memory.c  |   2 +-
 arch/nds32/mm/init.c  |  11 +-
 arch/nios2/mm/init.c  |   8 +-
 arch/openrisc/mm/init.c   |   9 +-
 arch/parisc/mm/init.c |  22 +-
 arch/powerpc/Kconfig  |  10 -
 arch/powerpc/mm/mem.c |   2 +-
 arch/riscv/Kconfig|   1 -
 arch/riscv/mm/init.c  |   2 +-
 arch/s390/Kconfig |   1 -
 arch/s390/mm/init.c   |   2 +-
 arch/sh/Kconfig   |   1 -
 arch/sh/mm/init.c |   2 +-
 arch/sparc/Kconfig|  10 -
 arch/sparc/mm/init_64.c   |   2 +-
 arch/sparc/mm/srmmu.c |  21 +-
 arch/um/kernel/mem.c  |  12 +-
 arch/unicore32/include/asm/memory.h   |   2 +-
 arch/unicore32/include/mach/memory.h  |   6 +-
 arch/unicore32/kernel/pci.c   |  14 +-
 arch/unicore32/mm/init.c  |  43 +--
 arch/x86/Kconfig  |  10 -
 arch/x86/mm/init.c|   2 +-
 arch/x86/mm/numa.c|  11 

Re: Boot flakiness with QEMU 3.1.0 and Clang built kernels

2020-04-12 Thread Cédric Le Goater
On 4/11/20 3:57 PM, Nicholas Piggin wrote:
> Nicholas Piggin's on April 11, 2020 7:32 pm:
>> Nathan Chancellor's on April 11, 2020 10:53 am:
>>> The tt.config values are needed to reproduce but I did not verify that
>>> ONLY tt.config was needed. Other than that, no, we are just building
>>> either pseries_defconfig or powernv_defconfig with those configs and
>>> letting it boot up with a simple initramfs, which prints the version
>>> string then shuts the machine down.
>>>
>>> Let me know if you need any more information, cheers!
>>
>> Okay I can reproduce it. Sometimes it eventually recovers after a long
>> pause, and some keyboard input often helps it along. So that seems like 
>> it might be a lost interrupt.
>>
>> POWER8 vs POWER9 might just be a timing thing if P9 is still hanging
>> sometimes. I wasn't able to reproduce it with defconfig+tt.config, I
>> needed your other config with various other debug options.
>>
>> Thanks for the very good report. I'll let you know what I find.
> 
> It looks like a qemu bug. Booting with '-d int' shows the decrementer 
> simply stops firing at the point of the hang, even though MSR[EE]=1 and 
> the DEC register is wrapping. Linux appears to be doing the right thing 
> as far as I can tell (not losing interrupts).
> 
> This qemu patch fixes the boot hang for me. I don't know that qemu 
> really has the right idea of "context synchronizing" as defined in the
> powerpc architecture -- mtmsrd L=1 is not context synchronizing but that
> does not mean it can avoid looking at exceptions until the next such
> event. It looks like the decrementer exception goes high but the
> execution of mtmsrd L=1 is ignoring it.
> 
> Prior to the Linux patch 3282a3da25b you bisected to, interrupt replay
> code would return with an 'rfi' instruction as part of interrupt return,
> which probably helped to get things moving along a bit. However it would
> not be foolproof, and Cedric did say he encountered some mysterious
> lockups under load with qemu powernv before that patch was merged, so
> maybe it's the same issue?

Nope :/ but this is a fix for an important problem reported by Anton in 
November. Attached is the test case.  

Thanks,

C. 


 
/*

Mikey and I noticed that the decrementer isn't firing when
it should. If a decrementer is pending and an mtmsrd(MSR_EE) is
executed then we should take the decrementer exception. From the PPC AS:

  If MSR EE = 0 and an External, Decrementer, or Per-
  formance Monitor exception is pending, executing
  an mtmsrd instruction that sets MSR EE to 1 will
  cause the interrupt to occur before the next instruc-
  tion is executed, if no higher priority exception
  exists

A test case is below. r31 is incremented for every decrementer
exception.

powerpc64le-linux-gcc -c test.S
powerpc64le-linux-ld -Ttext=0x0 -o test.elf test.o
powerpc64le-linux-objcopy -O binary test.elf test.bin

qemu-system-ppc64 -M powernv -cpu POWER9 -nographic -bios test.bin

"info registers" shows it looping in the lower loop, ie the
decrementer exception was never taken.

r31 never moves. If I build with:

powerpc64le-linux-gcc -DFIX_BROKEN -c test.S

I see r31 move.

*/

#include 

/* Load an immediate 64-bit value into a register */
#define LOAD_IMM64(r, e)\
lis r,(e)@highest;  \
ori r,r,(e)@higher; \
rldicr  r,r, 32, 31;\
orisr,r, (e)@h; \
ori r,r, (e)@l;

#define FIXUP_ENDIAN   \
tdi   0,0,0x48;   /* Reverse endian of b . + 8  */ \
b 191f;   /* Skip trampoline if endian is good  */ \
.long 0xa600607d; /* mfmsr r11  */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
.long 0x05009f42; /* bcl 20,31,$+4  */ \
.long 0xa602487d; /* mflr r10   */ \
.long 0x14004a39; /* addi r10,r10,20*/ \
.long 0xa64b5a7d; /* mthsrr0 r10*/ \
.long 0xa64b7b7d; /* mthsrr1 r11*/ \
.long 0x2402004c; /* hrfid  */ \
191:

.= 0x0
.globl _start
_start:
b   1f

.= 0x10
FIXUP_ENDIAN
b   1f

.= 0x100
1:
FIXUP_ENDIAN
b   __initialize

#define EXCEPTION(nr)   \
.= nr   ;\
b   .

/* More exception stubs */
EXCEPTION(0x300)
EXCEPTION(0x380)
EXCEPTION(0x400)
EXCEPTION(0x480)
EXCEPTION(0x500)
EXCEPTION(0x600)
EXCEPTION(0x700)
EXCEPTION(0x800)

.= 0x900
LOAD_IMM64(r0, 0x100)
mtdec   r0
addir31,r31,1
rfid

EXCEPTION(0x980)
EXCEPTION(0xa00)
EXCEPTION(0xb00)
EXCEPTION(0xc00)

Re: [RFC 1/3] Interface for an idle-stop dependency structure

2020-04-12 Thread Pratik Sampat

Hello Gautham

On 08/04/20 4:21 pm, Gautham R Shenoy wrote:

Hi Pratik,

On Wed, Mar 04, 2020 at 09:31:21PM +0530, Pratik Rajesh Sampat wrote:

Design patch to introduce the idea of having a dependency structure for
idle-stop. The structure encapsulates the following:
1. Bitmask for version of idle-stop
2. Bitmask for propterties like ENABLE/DISABLE
3. Function pointer which helps handle how the stop must be invoked

The commit lays a foundation for other idle-stop versions to be added
and handled cleanly based on their specified requirments.
Currently it handles the existing "idle-stop" version by setting the
discovery bits and the function pointer.

So, if this patch is applied, and we are running with an OPAL that
doesn't publish the "idle-stop" dt-cpu-feature, then the goal is to
not enable any stop states. Is this correct ?


Yes, all states will be disabled with no power saving.


Signed-off-by: Pratik Rajesh Sampat 
---
  arch/powerpc/include/asm/processor.h  | 17 +
  arch/powerpc/kernel/dt_cpu_ftrs.c |  5 +
  arch/powerpc/platforms/powernv/idle.c | 17 +
  drivers/cpuidle/cpuidle-powernv.c |  3 ++-
  4 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index eedcbfb9a6ff..da59f01a5c09 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -429,6 +429,23 @@ extern void power4_idle_nap(void);
  extern unsigned long cpuidle_disable;
  enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};

+#define STOP_ENABLE0x0001
+
+#define STOP_VERSION_P9   0x1
+
+/*
+ * Classify the dependencies of the stop states
+ * @idle_stop: function handler to handle the quirk stop version
+ * @cpuidle_prop: Signify support for stop states through kernel and/or 
firmware
+ * @stop_version: Classify quirk versions for stop states
+ */
+typedef struct {
+   unsigned long (*idle_stop)(unsigned long, bool);
+   uint8_t cpuidle_prop;
+   uint8_t stop_version;

Why do we need both cpuidle_prop and stop_version ?


The idea is that each stop_version has house multitude of overlapping 
properties.
So the idea is to give a clean distinction. However, I can see now that the
versioning and properties could be embedded in a single bitmask



@@ -657,6 +659,9 @@ static void __init cpufeatures_setup_start(u32 isa)
}
  }

+stop_deps_t stop_dep = {NULL, 0x0, 0x0};
+EXPORT_SYMBOL(stop_dep);
+
  static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
  {
const struct dt_cpu_feature_match *m;
diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index 78599bca66c2..c32cdc37acf4 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -812,7 +812,7 @@ static unsigned long power9_offline_stop(unsigned long 
psscr)

  #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
__ppc64_runlatch_off();
-   srr1 = power9_idle_stop(psscr, true);
+   srr1 = stop_dep.idle_stop(psscr, true);
__ppc64_runlatch_on();
  #else
/*
@@ -828,7 +828,7 @@ static unsigned long power9_offline_stop(unsigned long 
psscr)
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;

__ppc64_runlatch_off();
-   srr1 = power9_idle_stop(psscr, false);
+   srr1 = stop_dep.idle_stop(psscr, true);
__ppc64_runlatch_on();

local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
@@ -856,7 +856,7 @@ void power9_idle_type(unsigned long stop_psscr_val,
psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;

__ppc64_runlatch_off();
-   srr1 = power9_idle_stop(psscr, true);
+   srr1 = stop_dep.idle_stop(psscr, true);
__ppc64_runlatch_on();


There is one other place in arch/powerpc/kvm/book3s_hv_rmhandlers.S
where call isa300_idle_stop_mayloss (this is kvm_nap_sequence).

So, if stop states are not supported, then, KVM subsystem should know
about it. Some KVM configurations depend on putting the secondary
threads of the core offline into an idle state whose wakeup is from
0x100 vector. Your patch doesn't address that part.


Sure, I'll make sure to address it there too.




goto out;
+   switch(stop_dep.stop_version) {
+   case STOP_VERSION_P9:
+   stop_dep.idle_stop = power9_idle_stop;
+   break;
+   default:
+   stop_dep.idle_stop = NULL;

You should add a pr_warn() here that stop state isn't supported
because the kernel doesn't know about the version.


Sure


Thanks
Pratik



Re: [RFC] Support stop state version quirk and firmware enabled stop

2020-04-12 Thread Pratik Sampat

Hello Gautham,

On 08/04/20 3:20 pm, Gautham R Shenoy wrote:

Hi Pratik,

On Wed, Mar 04, 2020 at 09:26:48PM +0530, Pratik Rajesh Sampat wrote:

A concept patch in Skiboot to illustrate the case wherein handling of
stop states for different DD versions of a CPU can be achieved by a
simple modification in the list of cpu_features.
As an example idle-stop1 is defined which uses P9_CPU_DD1 to define the
cpu feature.

Along with that, an implementation is being worked upon the LE OPAL
series which helps OPAL handle the stop state entry and exit.

This patch advertises this capability of the firmware which can be
availed if the quirk-version-setting is not cognizable.

The firmware-enabled stop is being worked by Abhishek Goel
 building upon the LE OPAL series.

Signed-off-by: Pratik Rajesh Sampat 
---
  core/cpufeatures.c | 22 ++
  1 file changed, 22 insertions(+)

diff --git a/core/cpufeatures.c b/core/cpufeatures.c
index ec30c975..b9875e7b 100644
--- a/core/cpufeatures.c
+++ b/core/cpufeatures.c
@@ -510,6 +510,25 @@ static const struct cpu_feature cpu_features_table[] = {
-1, -1, -1,
NULL, },

+   /*
+* QUIRK for ISAv3.0B stop idle instructions and registers
+* Helps us determine if there are any quirks
+* XXX: Same of idle-stop
+*/
+   { "idle-stop-v1",
+   CPU_P9_DD1,
+   ISA_V3_0B, USABLE_HV|USABLE_OS,
+   HV_CUSTOM, OS_CUSTOM,
+   -1, -1, -1,
+   NULL, },


So, at this point, we don't need any such quirk for any of the DD
version right ? This is to demonstrate that if say P9_DD1 had a quirk
w.r.t stop-state handling, then this is how we would advertise it to
the kernel.


Absolutely, A dummy property has been added to show how quirk handling
with stop-states.


+
+   { "firmware-stop-supported",
+   CPU_P9,
+   ISA_V3_0B, USABLE_HV|USABLE_OS,
+   HV_CUSTOM, OS_CUSTOM,
+   -1, -1, -1,
+   NULL, },
+


I suppose this is for the opal-cpuidle driver support posted here:
https://lists.ozlabs.org/pipermail/skiboot/2020-April/016726.html


Right, this complements in usage of the opal-cpuidle driver


/*
 * ISAv3.0B Hypervisor Virtualization Interrupt
 * Also associated system registers, LPCR EE, HEIC, HVICE,
@@ -883,6 +902,9 @@ static void add_cpufeatures(struct dt_node *cpus,
const struct cpu_feature *f = _features_table[i];

if (f->cpus_supported & cpu_feature_cpu) {
+   if (!strcmp(f->name, "firmware-stop-supported") &&
+   HAVE_BIG_ENDIAN)
+   continue;

In OPAL do we have an macro defining BIG_ENDIAN ? If yes, you could
wrap the "firmware-stop-supported" in cpu_features_table[] within
#ifndef BIG_ENDIAN. That way you won't need a special case here.


HAVE_BIG_ENDIAN is actually a macro. Its a good idea to wrap it in
the declaration itself.




DBG("  '%s'\n", f->name);
add_cpu_feature_nodeps(features, f);
}
--
2.24.1


--
Thanks and Regards
gautham.




Re: [PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting

2020-04-12 Thread kbuild test robot
Hi Nicholas,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20200412]
[cannot apply to tip/perf/core v5.6]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64-machine-check-and-system-reset-fixes/20200407-134803
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-a001-20200412 (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:15,
from include/asm-generic/bug.h:19,
from arch/powerpc/include/asm/bug.h:109,
from include/linux/bug.h:5,
from arch/powerpc/include/asm/mmu.h:130,
from arch/powerpc/include/asm/paca.h:18,
from arch/powerpc/include/asm/current.h:13,
from include/linux/sched.h:12,
from arch/powerpc/kernel/process.c:14:
   arch/powerpc/kernel/process.c: In function 'show_regs':
>> arch/powerpc/kernel/process.c:1424:74: error: 'struct paca_struct' has no 
>> member named 'in_nmi'
1424 |  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 |  
^~
   include/linux/printk.h:317:26: note: in definition of macro 'pr_cont'
 317 |  printk(KERN_CONT fmt, ##__VA_ARGS__)
 |  ^~~
>> arch/powerpc/kernel/process.c:1424:99: error: 'struct paca_struct' has no 
>> member named 'in_mce'
1424 |  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 |  
 ^~
   include/linux/printk.h:317:26: note: in definition of macro 'pr_cont'
 317 |  printk(KERN_CONT fmt, ##__VA_ARGS__)
 |  ^~~

vim +1424 arch/powerpc/kernel/process.c

  1400  
  1401  void show_regs(struct pt_regs * regs)
  1402  {
  1403  int i, trap;
  1404  
  1405  show_regs_print_info(KERN_DEFAULT);
  1406  
  1407  printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
  1408 regs->nip, regs->link, regs->ctr);
  1409  printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
  1410 regs, regs->trap, print_tainted(), 
init_utsname()->release);
  1411  printk("MSR:  "REG" ", regs->msr);
  1412  print_msr_bits(regs->msr);
  1413  pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
  1414  trap = TRAP(regs);
  1415  if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
  1416  pr_cont("CFAR: "REG" ", regs->orig_gpr3);
  1417  if (trap == 0x200 || trap == 0x300 || trap == 0x600)
  1418  #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
  1419  pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, 
regs->dsisr);
  1420  #else
  1421  pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, 
regs->dsisr);
  1422  #endif
  1423  #ifdef CONFIG_PPC64
> 1424  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
> (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
  1425  #endif
  1426  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  1427  if (MSR_TM_ACTIVE(regs->msr))
  1428  pr_cont("\nPACATMSCRATCH: %016llx ", 
get_paca()->tm_scratch);
  1429  #endif
  1430  
  1431  for (i = 0;  i < 32;  i++) {
  1432  if ((i % REGS_PER_LINE) == 0)
  1433  pr_cont("\nGPR%02d: ", i);
  1434  pr_cont(REG " ", regs->gpr[i]);
  1435  if (i == LAST_VOLATILE && !FULL_REGS(regs))
  1436  break;
  1437  }
  1438  pr_cont("\n");
  1439  #ifdef CONFIG_KALLSYMS
  1440  /*
  1441   * Look