Re: [PATCH v7 09/14] arm64/numa: support HAVE_SETUP_PER_CPU_AREA
On 2016/8/26 21:28, Will Deacon wrote: > On Wed, Aug 24, 2016 at 03:44:48PM +0800, Zhen Lei wrote: >> To make each percpu area allocated from its local numa node. Without this >> patch, all percpu areas will be allocated from the node which cpu0 belongs >> to. >> >> Signed-off-by: Zhen Lei >> --- >> arch/arm64/Kconfig | 8 >> arch/arm64/mm/numa.c | 55 >> >> 2 files changed, 63 insertions(+) >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index bc3f00f..2815af6 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -603,6 +603,14 @@ config USE_PERCPU_NUMA_NODE_ID >> def_bool y >> depends on NUMA >> >> +config HAVE_SETUP_PER_CPU_AREA >> +def_bool y >> +depends on NUMA >> + >> +config NEED_PER_CPU_EMBED_FIRST_CHUNK >> +def_bool y >> +depends on NUMA > > Why do we need this? Is it purely about using block mappings for the > pcpu area? Without NEED_PER_CPU_EMBED_FIRST_CHUNK, Link error will be reported. #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) #define BUILD_EMBED_FIRST_CHUNK #endif #if defined(BUILD_EMBED_FIRST_CHUNK) //pcpu_embed_first_chunk definition #endif setup_per_cpu_areas -->pcpu_embed_first_chunk > >> source kernel/Kconfig.preempt >> source kernel/Kconfig.hz >> >> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c >> index 7b73808..5e44ad1 100644 >> --- a/arch/arm64/mm/numa.c >> +++ b/arch/arm64/mm/numa.c >> @@ -26,6 +26,7 @@ >> #include >> >> #include >> +#include >> >> struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; >> EXPORT_SYMBOL(node_data); >> @@ -131,6 +132,60 @@ void __init early_map_cpu_to_node(unsigned int cpu, int >> nid) >> cpu_to_node_map[cpu] = nid; >> } >> >> +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA >> +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; >> +EXPORT_SYMBOL(__per_cpu_offset); >> + >> +static int __init early_cpu_to_node(int cpu) >> +{ >> +return cpu_to_node_map[cpu]; >> +} >> + >> +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) >> +{ >> +if (early_cpu_to_node(from) == early_cpu_to_node(to)) >> +return LOCAL_DISTANCE; >> +else >> +return REMOTE_DISTANCE; >> +} > > Is it too early to use __node_distance here? Good, we can directly use node_distance, thanks. > >> +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, >> + size_t align) >> +{ >> +int nid = early_cpu_to_node(cpu); >> + >> +return memblock_virt_alloc_try_nid(size, align, >> +__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); >> +} >> + >> +static void __init pcpu_fc_free(void *ptr, size_t size) >> +{ >> +memblock_free_early(__pa(ptr), size); >> +} >> + >> +void __init setup_per_cpu_areas(void) >> +{ >> +unsigned long delta; >> +unsigned int cpu; >> +int rc; >> + >> +/* >> + * Always reserve area for module percpu variables. That's >> + * what the legacy allocator did. >> + */ >> +rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, >> +PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, >> +pcpu_cpu_distance, >> +pcpu_fc_alloc, pcpu_fc_free); >> +if (rc < 0) >> +panic("Failed to initialize percpu areas."); >> + >> +delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; >> +for_each_possible_cpu(cpu) >> +__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; >> +} >> +#endif > > It's a pity that this is practically identical to PowerPC. Ideally, there > would be definitions of this initialisation gunk in the core code that > could be reused across architectures. But these are different from other ARCHs, except PPC. I originally want to put it into driver/of/of_numa.c, but now the ACPI NUMA is coming up, so I don't known where. > > Will > > . >
Re: [PATCH v7 09/14] arm64/numa: support HAVE_SETUP_PER_CPU_AREA
On Wed, Aug 24, 2016 at 03:44:48PM +0800, Zhen Lei wrote: > To make each percpu area allocated from its local numa node. Without this > patch, all percpu areas will be allocated from the node which cpu0 belongs > to. > > Signed-off-by: Zhen Lei > --- > arch/arm64/Kconfig | 8 > arch/arm64/mm/numa.c | 55 > > 2 files changed, 63 insertions(+) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index bc3f00f..2815af6 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -603,6 +603,14 @@ config USE_PERCPU_NUMA_NODE_ID > def_bool y > depends on NUMA > > +config HAVE_SETUP_PER_CPU_AREA > + def_bool y > + depends on NUMA > + > +config NEED_PER_CPU_EMBED_FIRST_CHUNK > + def_bool y > + depends on NUMA Why do we need this? Is it purely about using block mappings for the pcpu area? > source kernel/Kconfig.preempt > source kernel/Kconfig.hz > > diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c > index 7b73808..5e44ad1 100644 > --- a/arch/arm64/mm/numa.c > +++ b/arch/arm64/mm/numa.c > @@ -26,6 +26,7 @@ > #include > > #include > +#include > > struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; > EXPORT_SYMBOL(node_data); > @@ -131,6 +132,60 @@ void __init early_map_cpu_to_node(unsigned int cpu, int > nid) > cpu_to_node_map[cpu] = nid; > } > > +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA > +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; > +EXPORT_SYMBOL(__per_cpu_offset); > + > +static int __init early_cpu_to_node(int cpu) > +{ > + return cpu_to_node_map[cpu]; > +} > + > +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) > +{ > + if (early_cpu_to_node(from) == early_cpu_to_node(to)) > + return LOCAL_DISTANCE; > + else > + return REMOTE_DISTANCE; > +} Is it too early to use __node_distance here? > +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, > +size_t align) > +{ > + int nid = early_cpu_to_node(cpu); > + > + return memblock_virt_alloc_try_nid(size, align, > + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); > +} > + > +static void __init pcpu_fc_free(void *ptr, size_t size) > +{ > + memblock_free_early(__pa(ptr), size); > +} > + > +void __init setup_per_cpu_areas(void) > +{ > + unsigned long delta; > + unsigned int cpu; > + int rc; > + > + /* > + * Always reserve area for module percpu variables. That's > + * what the legacy allocator did. > + */ > + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, > + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, > + pcpu_cpu_distance, > + pcpu_fc_alloc, pcpu_fc_free); > + if (rc < 0) > + panic("Failed to initialize percpu areas."); > + > + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; > + for_each_possible_cpu(cpu) > + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; > +} > +#endif It's a pity that this is practically identical to PowerPC. Ideally, there would be definitions of this initialisation gunk in the core code that could be reused across architectures. Will
[PATCH v7 09/14] arm64/numa: support HAVE_SETUP_PER_CPU_AREA
To make each percpu area allocated from its local numa node. Without this patch, all percpu areas will be allocated from the node which cpu0 belongs to. Signed-off-by: Zhen Lei --- arch/arm64/Kconfig | 8 arch/arm64/mm/numa.c | 55 2 files changed, 63 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bc3f00f..2815af6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -603,6 +603,14 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA +config HAVE_SETUP_PER_CPU_AREA + def_bool y + depends on NUMA + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + depends on NUMA + source kernel/Kconfig.preempt source kernel/Kconfig.hz diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 7b73808..5e44ad1 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -26,6 +26,7 @@ #include #include +#include struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -131,6 +132,60 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid) cpu_to_node_map[cpu] = nid; } +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init early_cpu_to_node(int cpu) +{ + return cpu_to_node_map[cpu]; +} + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + if (early_cpu_to_node(from) == early_cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + int nid = early_cpu_to_node(cpu); + + return memblock_virt_alloc_try_nid(size, align, + __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* +* Always reserve area for module percpu variables. That's +* what the legacy allocator did. +*/ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + /** * numa_add_memblk - Set node id to memblk * @nid: NUMA node ID of the new memblk -- 2.5.0