From: Yulei Zhang <yuleixzh...@tencent.com>

It add hugepage support for dmemfs. We use PFN_DMEM to notify
vmf_insert_pfn_pmd, and dmem huge pmd will be marked with
_PAGE_SPECIAL and _PAGE_DMEM. So that GUP-fast can separate
dmemfs page from other page type and handle it correctly.

Signed-off-by: Chen Zhuo <sagazc...@tencent.com>
Signed-off-by: Yulei Zhang <yuleixzh...@tencent.com>
---
 fs/dmemfs/inode.c | 113 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 2 deletions(-)

diff --git a/fs/dmemfs/inode.c b/fs/dmemfs/inode.c
index b3e394f33b42..53a9bf214e0d 100644
--- a/fs/dmemfs/inode.c
+++ b/fs/dmemfs/inode.c
@@ -460,7 +460,7 @@ static int dmemfs_split(struct vm_area_struct *vma, 
unsigned long addr)
        return 0;
 }
 
-static vm_fault_t dmemfs_fault(struct vm_fault *vmf)
+static vm_fault_t __dmemfs_fault(struct vm_fault *vmf)
 {
        struct vm_area_struct *vma = vmf->vma;
        struct inode *inode = file_inode(vma->vm_file);
@@ -488,6 +488,63 @@ static vm_fault_t dmemfs_fault(struct vm_fault *vmf)
        return ret;
 }
 
+static vm_fault_t  __dmemfs_pmd_fault(struct vm_fault *vmf)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       unsigned long pmd_addr = vmf->address & PMD_MASK;
+       unsigned long page_addr;
+       struct inode *inode = file_inode(vma->vm_file);
+       void *entry;
+       phys_addr_t phys;
+       pfn_t pfn;
+       int ret;
+
+       if (dmem_page_size(inode) < PMD_SIZE)
+               return VM_FAULT_FALLBACK;
+
+       WARN_ON(pmd_addr < vma->vm_start ||
+               vma->vm_end < pmd_addr + PMD_SIZE);
+
+       page_addr = vmf->address & ~(dmem_page_size(inode) - 1);
+       entry = radix_get_create_entry(vma, page_addr, inode,
+                                      linear_page_index(vma, page_addr));
+       if (IS_ERR(entry))
+               return (PTR_ERR(entry) == -ENOMEM) ?
+                       VM_FAULT_OOM : VM_FAULT_SIGBUS;
+
+       phys = dmem_addr_to_pfn(inode, dmem_entry_to_addr(inode, entry),
+                               linear_page_index(vma, pmd_addr), PMD_SHIFT);
+       phys <<= PAGE_SHIFT;
+       pfn = phys_to_pfn_t(phys, PFN_DMEM);
+       ret = vmf_insert_pfn_pmd(vmf, pfn, !!(vma->vm_flags & VM_WRITE));
+
+       radix_put_entry();
+       return ret;
+}
+
+static vm_fault_t dmemfs_huge_fault(struct vm_fault *vmf, enum page_entry_size 
pe_size)
+{
+       int ret;
+
+       switch (pe_size) {
+       case PE_SIZE_PTE:
+               ret = __dmemfs_fault(vmf);
+               break;
+       case PE_SIZE_PMD:
+               ret = __dmemfs_pmd_fault(vmf);
+               break;
+       default:
+               ret = VM_FAULT_SIGBUS;
+       }
+
+       return ret;
+}
+
+static vm_fault_t dmemfs_fault(struct vm_fault *vmf)
+{
+       return dmemfs_huge_fault(vmf, PE_SIZE_PTE);
+}
+
 static unsigned long dmemfs_pagesize(struct vm_area_struct *vma)
 {
        return dmem_page_size(file_inode(vma->vm_file));
@@ -498,6 +555,7 @@ static const struct vm_operations_struct dmemfs_vm_ops = {
        .fault = dmemfs_fault,
        .pagesize = dmemfs_pagesize,
        .access = dmemfs_access_dmem,
+       .huge_fault = dmemfs_huge_fault,
 };
 
 int dmemfs_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -510,15 +568,66 @@ int dmemfs_file_mmap(struct file *file, struct 
vm_area_struct *vma)
        if (!(vma->vm_flags & VM_SHARED))
                return -EINVAL;
 
-       vma->vm_flags |= VM_PFNMAP | VM_DMEM | VM_IO;
+       vma->vm_flags |= VM_PFNMAP | VM_DONTCOPY | VM_DMEM | VM_IO;
+
+       if (dmem_page_size(inode) != PAGE_SIZE)
+               vma->vm_flags |= VM_HUGEPAGE;
 
        file_accessed(file);
        vma->vm_ops = &dmemfs_vm_ops;
        return 0;
 }
 
+/*
+ * If the size of area returned by mm->get_unmapped_area() is one
+ * dmem pagesize larger than 'len', the returned addr by
+ * mm->get_unmapped_area() could be aligned to dmem pagesize to
+ * meet alignment demand.
+ */
+static unsigned long
+dmemfs_get_unmapped_area(struct file *file, unsigned long addr,
+                        unsigned long len, unsigned long pgoff,
+                        unsigned long flags)
+{
+       unsigned long len_pad;
+       unsigned long off = pgoff << PAGE_SHIFT;
+       unsigned long align;
+
+       align = dmem_page_size(file_inode(file));
+
+       /* For pud or pmd pagesize, could not support fault fallback. */
+       if (len & (align - 1))
+               return -EINVAL;
+       if (len > TASK_SIZE)
+               return -ENOMEM;
+
+       if (flags & MAP_FIXED) {
+               if (addr & (align - 1))
+                       return -EINVAL;
+               return addr;
+       }
+
+       /*
+        * Pad a extra align space for 'len', as we want to find a unmapped
+        * area which is larger enough to align with dmemfs pagesize, if
+        * pagesize of dmem is larger than 4K.
+        */
+       len_pad = (align == PAGE_SIZE) ? len : len + align;
+
+       /* 'len' or 'off' is too large for pad. */
+       if (len_pad < len || (off + len_pad) < off)
+               return -EINVAL;
+
+       addr = current->mm->get_unmapped_area(file, addr, len_pad,
+                                             pgoff, flags);
+
+       /* Now 'addr' could be aligned to upper boundary. */
+       return IS_ERR_VALUE(addr) ? addr : round_up(addr, align);
+}
+
 static const struct file_operations dmemfs_file_operations = {
        .mmap = dmemfs_file_mmap,
+       .get_unmapped_area = dmemfs_get_unmapped_area,
 };
 
 static int dmemfs_parse_param(struct fs_context *fc, struct fs_parameter 
*param)
-- 
2.28.0

Reply via email to