Implement page mapping percpu first chunk allocator as a fallback to
the embedding allocator. With 4K hash translation we limit our page
table range to 64TB and commit: 0034d395f89d ("powerpc/mm/hash64: Map all the
kernel regions in the same 0xc range") moved all kernel mapping to
that 64TB range. In-order to support sparse memory layout we need
to increase our linear mapping space and reduce other mappings.

With such a layout percpu embedded first chunk allocator will fail
because of small vmalloc range. Add a fallback to page mapping
percpu first chunk allocator for such failures.

The below dmesg output can be observed in such case.

 percpu: max_distance=0x1ffffef00000 too large for vmalloc space 0x10000000000
 PERCPU: auto allocator failed (-22), falling back to page size
 percpu: 40 4K pages/cpu s148816 r0 d15024

Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.ibm.com>
---
 arch/powerpc/Kconfig           |  5 ++-
 arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++++++++++++++++++++--
 2 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..820365c42065 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -59,7 +59,10 @@ config HAVE_SETUP_PER_CPU_AREA
        def_bool PPC64
 
 config NEED_PER_CPU_EMBED_FIRST_CHUNK
-       def_bool PPC64
+       def_bool y if PPC64
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+       def_bool y if PPC64
 
 config NR_IRQS
        int "Number of virtual interrupt numbers"
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eaddd53a0e13..6090d8290561 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -66,6 +66,7 @@
 #include <asm/feature-fixups.h>
 #include <asm/kup.h>
 #include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
 
 #include "setup.h"
 
@@ -808,13 +809,58 @@ static int pcpu_cpu_distance(unsigned int from, unsigned 
int to)
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d)) {
+               pud_t *new;
+
+               new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               p4d_populate(&init_mm, p4d, new);
+       }
+
+       pud = pud_offset(p4d, addr);
+       if (pud_none(*pud)) {
+               pmd_t *new;
+
+               new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pud_populate(&init_mm, pud, new);
+       }
+
+       pmd = pmd_offset(pud, addr);
+       if (!pmd_present(*pmd)) {
+               pte_t *new;
+
+               new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pmd_populate_kernel(&init_mm, pmd, new);
+       }
+
+       return;
+
+err_alloc:
+       panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
+             __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+}
+
+
 void __init setup_per_cpu_areas(void)
 {
        const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
        size_t atom_size;
        unsigned long delta;
        unsigned int cpu;
-       int rc;
+       int rc = -EINVAL;
 
        /*
         * Linear mapping is one of 4K, 1M and 16M.  For 4K, no need
@@ -826,8 +872,18 @@ void __init setup_per_cpu_areas(void)
        else
                atom_size = 1 << 20;
 
-       rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
-                                   pcpu_alloc_bootmem, pcpu_free_bootmem);
+       if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+               rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, 
pcpu_cpu_distance,
+                                           pcpu_alloc_bootmem, 
pcpu_free_bootmem);
+               if (rc)
+                       pr_warn("PERCPU: %s allocator failed (%d), "
+                               "falling back to page size\n",
+                               pcpu_fc_names[pcpu_chosen_fc], rc);
+       }
+
+       if (rc < 0)
+               rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, 
pcpu_free_bootmem,
+                                          pcpu_populate_pte);
        if (rc < 0)
                panic("cannot initialize percpu area (err=%d)", rc);
 
-- 
2.26.2

Reply via email to