Currently DAX folio/page reference counts are managed differently to
normal pages. To allow these to be managed the same as normal pages
introduce dax_insert_pfn_pmd. This will map the entire PMD-sized folio
and take references as it would for a normally mapped page.

This is distinct from the current mechanism, vmf_insert_pfn_pmd, which
simply inserts a special devmap PMD entry into the page table without
holding a reference to the page for the mapping.

Signed-off-by: Alistair Popple <apop...@nvidia.com>
---
 include/linux/huge_mm.h |  1 +-
 mm/huge_memory.c        | 70 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 71 insertions(+)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b98a3cc..9207d8e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -39,6 +39,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
 
 vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
 vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t dax_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
 vm_fault_t dax_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
 
 enum transparent_hugepage_flag {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e1f053e..a9874ac 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1202,6 +1202,76 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, 
pfn_t pfn, bool write)
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
 
+vm_fault_t dax_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       unsigned long addr = vmf->address & PMD_MASK;
+       pmd_t *pmd = vmf->pmd;
+       struct mm_struct *mm = vma->vm_mm;
+       pmd_t entry;
+       spinlock_t *ptl;
+       pgtable_t pgtable = NULL;
+       struct folio *folio;
+       struct page *page;
+
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
+       if (arch_needs_pgtable_deposit()) {
+               pgtable = pte_alloc_one(vma->vm_mm);
+               if (!pgtable)
+                       return VM_FAULT_OOM;
+       }
+
+       track_pfn_insert(vma, &vma->vm_page_prot, pfn);
+
+       ptl = pmd_lock(mm, pmd);
+       if (!pmd_none(*pmd)) {
+               if (write) {
+                       if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
+                               WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
+                               goto out_unlock;
+                       }
+                       entry = pmd_mkyoung(*pmd);
+                       entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+                       if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+                               update_mmu_cache_pmd(vma, addr, pmd);
+               }
+
+               goto out_unlock;
+       }
+
+       entry = pmd_mkhuge(pfn_t_pmd(pfn, vma->vm_page_prot));
+       if (pfn_t_devmap(pfn))
+               entry = pmd_mkdevmap(entry);
+       if (write) {
+               entry = pmd_mkyoung(pmd_mkdirty(entry));
+               entry = maybe_pmd_mkwrite(entry, vma);
+       }
+
+       if (pgtable) {
+               pgtable_trans_huge_deposit(mm, pmd, pgtable);
+               mm_inc_nr_ptes(mm);
+               pgtable = NULL;
+       }
+
+       page = pfn_t_to_page(pfn);
+       folio = page_folio(page);
+       folio_get(folio);
+       folio_add_file_rmap_pmd(folio, page, vma);
+       add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
+       set_pmd_at(mm, addr, pmd, entry);
+       update_mmu_cache_pmd(vma, addr, pmd);
+
+out_unlock:
+       spin_unlock(ptl);
+       if (pgtable)
+               pte_free(mm, pgtable);
+
+       return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(dax_insert_pfn_pmd);
+
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
 {
-- 
git-series 0.9.1

Reply via email to