Re: [PATCH v4 06/11] riscv: mm: Add memory hotplugging support
On Wed, Jun 5, 2024 at 1:41 PM Björn Töpel wrote: > > From: Björn Töpel > > For an architecture to support memory hotplugging, a couple of > callbacks needs to be implemented: > > arch_add_memory() > This callback is responsible for adding the physical memory into the > direct map, and call into the memory hotplugging generic code via > __add_pages() that adds the corresponding struct page entries, and > updates the vmemmap mapping. > > arch_remove_memory() > This is the inverse of the callback above. > > vmemmap_free() > This function tears down the vmemmap mappings (if > CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the > backing vmemmap pages. Note that for persistent memory, an > alternative allocator for the backing pages can be used; The > vmem_altmap. This means that when the backing pages are cleared, > extra care is needed so that the correct deallocation method is > used. > > arch_get_mappable_range() > This functions returns the PA range that the direct map can map. > Used by the MHP internals for sanity checks. > > The page table unmap/teardown functions are heavily based on code from > the x86 tree. The same remove_pgd_mapping() function is used in both > vmemmap_free() and arch_remove_memory(), but in the latter function > the backing pages are not removed. > > Signed-off-by: Björn Töpel > --- > arch/riscv/mm/init.c | 267 +++ > 1 file changed, 267 insertions(+) > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index 1f7e7c223bec..bfa2dea95354 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -1534,3 +1534,270 @@ struct execmem_info __init *execmem_arch_setup(void) > } > #endif /* CONFIG_MMU */ > #endif /* CONFIG_EXECMEM */ > + > +#ifdef CONFIG_MEMORY_HOTPLUG > +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) > +{ > + struct page *page = pmd_page(*pmd); > + struct ptdesc *ptdesc = page_ptdesc(page); > + pte_t *pte; > + int i; > + > + for (i = 0; i < PTRS_PER_PTE; i++) { > + pte = pte_start + i; > + if (!pte_none(*pte)) > + return; > + } > + > + pagetable_pte_dtor(ptdesc); > + if (PageReserved(page)) > + free_reserved_page(page); > + else > + pagetable_free(ptdesc); > + pmd_clear(pmd); > +} > + > +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) > +{ > + struct page *page = pud_page(*pud); > + struct ptdesc *ptdesc = page_ptdesc(page); > + pmd_t *pmd; > + int i; > + > + for (i = 0; i < PTRS_PER_PMD; i++) { > + pmd = pmd_start + i; > + if (!pmd_none(*pmd)) > + return; > + } > + > + pagetable_pmd_dtor(ptdesc); > + if (PageReserved(page)) > + free_reserved_page(page); > + else > + pagetable_free(ptdesc); > + pud_clear(pud); > +} > + > +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) > +{ > + struct page *page = p4d_page(*p4d); > + pud_t *pud; > + int i; > + > + for (i = 0; i < PTRS_PER_PUD; i++) { > + pud = pud_start + i; > + if (!pud_none(*pud)) > + return; > + } > + > + if (PageReserved(page)) > + free_reserved_page(page); > + else > + free_pages((unsigned long)page_address(page), 0); > + p4d_clear(p4d); > +} > + > +static void __meminit free_vmemmap_storage(struct page *page, size_t size, > + struct vmem_altmap *altmap) > +{ > + int order = get_order(size); > + > + if (altmap) { > + vmem_altmap_free(altmap, size >> PAGE_SHIFT); > + return; > + } > + > + if (PageReserved(page)) { > + unsigned int nr_pages = 1 << order; > + > + while (nr_pages--) > + free_reserved_page(page++); > + return; > + } > + > + free_pages((unsigned long)page_address(page), order); > +} > + > +static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long > addr, unsigned long end, > +bool is_vmemmap, struct vmem_altmap > *altmap) > +{ > + unsigned long next; > + pte_t *ptep, pte; > + > + for (; addr < end; addr = next) { > + next = (addr + PAGE_SIZE) & PAGE_MASK; > + if (next > end) > + next = end; > + > + ptep = pte_base + pte_index(addr); > + pte = ptep_get(ptep); > + if (!pte_present(*ptep)) > + continue; > + > + pte_clear(_mm, addr, ptep); > + if (is_vmemmap) > + free_vmemmap_storage(pte_page(pte), PAGE_SIZE, > altmap); > + } > +} > + >
[PATCH v4 06/11] riscv: mm: Add memory hotplugging support
From: Björn Töpel For an architecture to support memory hotplugging, a couple of callbacks needs to be implemented: arch_add_memory() This callback is responsible for adding the physical memory into the direct map, and call into the memory hotplugging generic code via __add_pages() that adds the corresponding struct page entries, and updates the vmemmap mapping. arch_remove_memory() This is the inverse of the callback above. vmemmap_free() This function tears down the vmemmap mappings (if CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the backing vmemmap pages. Note that for persistent memory, an alternative allocator for the backing pages can be used; The vmem_altmap. This means that when the backing pages are cleared, extra care is needed so that the correct deallocation method is used. arch_get_mappable_range() This functions returns the PA range that the direct map can map. Used by the MHP internals for sanity checks. The page table unmap/teardown functions are heavily based on code from the x86 tree. The same remove_pgd_mapping() function is used in both vmemmap_free() and arch_remove_memory(), but in the latter function the backing pages are not removed. Signed-off-by: Björn Töpel --- arch/riscv/mm/init.c | 267 +++ 1 file changed, 267 insertions(+) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 1f7e7c223bec..bfa2dea95354 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1534,3 +1534,270 @@ struct execmem_info __init *execmem_arch_setup(void) } #endif /* CONFIG_MMU */ #endif /* CONFIG_EXECMEM */ + +#ifdef CONFIG_MEMORY_HOTPLUG +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ + struct page *page = pmd_page(*pmd); + struct ptdesc *ptdesc = page_ptdesc(page); + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (!pte_none(*pte)) + return; + } + + pagetable_pte_dtor(ptdesc); + if (PageReserved(page)) + free_reserved_page(page); + else + pagetable_free(ptdesc); + pmd_clear(pmd); +} + +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +{ + struct page *page = pud_page(*pud); + struct ptdesc *ptdesc = page_ptdesc(page); + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (!pmd_none(*pmd)) + return; + } + + pagetable_pmd_dtor(ptdesc); + if (PageReserved(page)) + free_reserved_page(page); + else + pagetable_free(ptdesc); + pud_clear(pud); +} + +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +{ + struct page *page = p4d_page(*p4d); + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (!pud_none(*pud)) + return; + } + + if (PageReserved(page)) + free_reserved_page(page); + else + free_pages((unsigned long)page_address(page), 0); + p4d_clear(p4d); +} + +static void __meminit free_vmemmap_storage(struct page *page, size_t size, + struct vmem_altmap *altmap) +{ + int order = get_order(size); + + if (altmap) { + vmem_altmap_free(altmap, size >> PAGE_SHIFT); + return; + } + + if (PageReserved(page)) { + unsigned int nr_pages = 1 << order; + + while (nr_pages--) + free_reserved_page(page++); + return; + } + + free_pages((unsigned long)page_address(page), order); +} + +static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end, +bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + pte_t *ptep, pte; + + for (; addr < end; addr = next) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + ptep = pte_base + pte_index(addr); + pte = ptep_get(ptep); + if (!pte_present(*ptep)) + continue; + + pte_clear(_mm, addr, ptep); + if (is_vmemmap) + free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap); + } +} + +static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end, +bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + pte_t *pte_base; + pmd_t *pmdp, pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); +