From: Björn Töpel <bj...@rivosinc.com>

For an architecture to support memory hotplugging, a couple of
callbacks needs to be implemented:

 arch_add_memory()
  This callback is responsible for adding the physical memory into the
  direct map, and call into the memory hotplugging generic code via
  __add_pages() that adds the corresponding struct page entries, and
  updates the vmemmap mapping.

 arch_remove_memory()
  This is the inverse of the callback above.

 vmemmap_free()
  This function tears down the vmemmap mappings (if
  CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the
  backing vmemmap pages. Note that for persistent memory, an
  alternative allocator for the backing pages can be used; The
  vmem_altmap. This means that when the backing pages are cleared,
  extra care is needed so that the correct deallocation method is
  used.

 arch_get_mappable_range()
  This functions returns the PA range that the direct map can map.
  Used by the MHP internals for sanity checks.

The page table unmap/teardown functions are heavily based on code from
the x86 tree. The same remove_pgd_mapping() function is used in both
vmemmap_free() and arch_remove_memory(), but in the latter function
the backing pages are not removed.

Signed-off-by: Björn Töpel <bj...@rivosinc.com>
---
 arch/riscv/mm/init.c | 261 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 261 insertions(+)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 6f72b0b2b854..6693b742bf2f 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1493,3 +1493,264 @@ void __init pgtable_cache_init(void)
        }
 }
 #endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static void __meminit free_pagetable(struct page *page, int order)
+{
+       unsigned int nr_pages = 1 << order;
+
+       /*
+        * vmemmap/direct page tables can be reserved, if added at
+        * boot.
+        */
+       if (PageReserved(page)) {
+               __ClearPageReserved(page);
+               while (nr_pages--)
+                       free_reserved_page(page++);
+               return;
+       }
+
+       free_pages((unsigned long)page_address(page), order);
+}
+
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
+       pte_t *pte;
+       int i;
+
+       for (i = 0; i < PTRS_PER_PTE; i++) {
+               pte = pte_start + i;
+               if (!pte_none(*pte))
+                       return;
+       }
+
+       free_pagetable(pmd_page(*pmd), 0);
+       pmd_clear(pmd);
+}
+
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+{
+       pmd_t *pmd;
+       int i;
+
+       for (i = 0; i < PTRS_PER_PMD; i++) {
+               pmd = pmd_start + i;
+               if (!pmd_none(*pmd))
+                       return;
+       }
+
+       free_pagetable(pud_page(*pud), 0);
+       pud_clear(pud);
+}
+
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
+{
+       pud_t *pud;
+       int i;
+
+       for (i = 0; i < PTRS_PER_PUD; i++) {
+               pud = pud_start + i;
+               if (!pud_none(*pud))
+                       return;
+       }
+
+       free_pagetable(p4d_page(*p4d), 0);
+       p4d_clear(p4d);
+}
+
+static void __meminit free_vmemmap_storage(struct page *page, size_t size,
+                                          struct vmem_altmap *altmap)
+{
+       if (altmap)
+               vmem_altmap_free(altmap, size >> PAGE_SHIFT);
+       else
+               free_pagetable(page, get_order(size));
+}
+
+static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, 
unsigned long end,
+                                        bool is_vmemmap, struct vmem_altmap 
*altmap)
+{
+       unsigned long next;
+       pte_t *ptep, pte;
+
+       for (; addr < end; addr = next) {
+               next = (addr + PAGE_SIZE) & PAGE_MASK;
+               if (next > end)
+                       next = end;
+
+               ptep = pte_base + pte_index(addr);
+               pte = READ_ONCE(*ptep);
+
+               if (!pte_present(*ptep))
+                       continue;
+
+               pte_clear(&init_mm, addr, ptep);
+               if (is_vmemmap)
+                       free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap);
+       }
+}
+
+static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, 
unsigned long end,
+                                        bool is_vmemmap, struct vmem_altmap 
*altmap)
+{
+       unsigned long next;
+       pte_t *pte_base;
+       pmd_t *pmdp, pmd;
+
+       for (; addr < end; addr = next) {
+               next = pmd_addr_end(addr, end);
+               pmdp = pmd_base + pmd_index(addr);
+               pmd = READ_ONCE(*pmdp);
+
+               if (!pmd_present(pmd))
+                       continue;
+
+               if (pmd_leaf(pmd)) {
+                       pmd_clear(pmdp);
+                       if (is_vmemmap)
+                               free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, 
altmap);
+                       continue;
+               }
+
+               pte_base = (pte_t *)pmd_page_vaddr(*pmdp);
+               remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap);
+               free_pte_table(pte_base, pmdp);
+       }
+}
+
+static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, 
unsigned long end,
+                                        bool is_vmemmap, struct vmem_altmap 
*altmap)
+{
+       unsigned long next;
+       pud_t *pudp, pud;
+       pmd_t *pmd_base;
+
+       for (; addr < end; addr = next) {
+               next = pud_addr_end(addr, end);
+               pudp = pud_base + pud_index(addr);
+               pud = READ_ONCE(*pudp);
+
+               if (!pud_present(pud))
+                       continue;
+
+               if (pud_leaf(pud)) {
+                       if (pgtable_l4_enabled) {
+                               pud_clear(pudp);
+                               if (is_vmemmap)
+                                       free_vmemmap_storage(pud_page(pud), 
PUD_SIZE, altmap);
+                       }
+                       continue;
+               }
+
+               pmd_base = pmd_offset(pudp, 0);
+               remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap);
+
+               if (pgtable_l4_enabled)
+                       free_pmd_table(pmd_base, pudp);
+       }
+}
+
+static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, 
unsigned long end,
+                                        bool is_vmemmap, struct vmem_altmap 
*altmap)
+{
+       unsigned long next;
+       p4d_t *p4dp, p4d;
+       pud_t *pud_base;
+
+       for (; addr < end; addr = next) {
+               next = p4d_addr_end(addr, end);
+               p4dp = p4d_base + p4d_index(addr);
+               p4d = READ_ONCE(*p4dp);
+
+               if (!p4d_present(p4d))
+                       continue;
+
+               if (p4d_leaf(p4d)) {
+                       if (pgtable_l5_enabled) {
+                               p4d_clear(p4dp);
+                               if (is_vmemmap)
+                                       free_vmemmap_storage(p4d_page(p4d), 
P4D_SIZE, altmap);
+                       }
+                       continue;
+               }
+
+               pud_base = pud_offset(p4dp, 0);
+               remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap);
+
+               if (pgtable_l5_enabled)
+                       free_pud_table(pud_base, p4dp);
+       }
+}
+
+static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, 
bool is_vmemmap,
+                                        struct vmem_altmap *altmap)
+{
+       unsigned long addr, next;
+       p4d_t *p4d_base;
+       pgd_t *pgd;
+
+       for (addr = va; addr < end; addr = next) {
+               next = pgd_addr_end(addr, end);
+               pgd = pgd_offset_k(addr);
+
+               if (!pgd_present(*pgd))
+                       continue;
+
+               if (pgd_leaf(*pgd))
+                       continue;
+
+               p4d_base = p4d_offset(pgd, 0);
+               remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap);
+       }
+
+       flush_tlb_all();
+}
+
+static void __meminit remove_linear_mapping(phys_addr_t start, u64 size)
+{
+       unsigned long va = (unsigned long)__va(start);
+       unsigned long end = (unsigned long)__va(start + size);
+
+       remove_pgd_mapping(va, end, false, NULL);
+}
+
+struct range arch_get_mappable_range(void)
+{
+       struct range mhp_range;
+
+       mhp_range.start = __pa(PAGE_OFFSET);
+       mhp_range.end = __pa(PAGE_END - 1);
+       return mhp_range;
+}
+
+int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params 
*params)
+{
+       int ret = 0;
+
+       create_linear_mapping_range(start, start + size, 0, &params->pgprot);
+       ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params);
+       if (ret) {
+               remove_linear_mapping(start, size);
+               goto out;
+       }
+
+       max_pfn = PFN_UP(start + size);
+       max_low_pfn = max_pfn;
+
+ out:
+       flush_tlb_all();
+       return ret;
+}
+
+void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+{
+       __remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap);
+       remove_linear_mapping(start, size);
+       flush_tlb_all();
+}
+
+void __ref vmemmap_free(unsigned long start, unsigned long end, struct 
vmem_altmap *altmap)
+{
+       remove_pgd_mapping(start, end, true, altmap);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
-- 
2.40.1


Reply via email to