When CONFIG_FS_DAX_PMD is set, DAX supports mmap() using PMD page
size.  This feature relies on both mmap virtual address and FS
block data (i.e. physical address) to be aligned by the PMD page
size.  Users can use mkfs options to specify FS to align block
allocations.  However, aligning mmap() address requires application
changes to mmap() calls, such as:

 -  /* let the kernel to assign a mmap addr */
 -  mptr = mmap(NULL, fsize, PROT_READ|PROT_WRITE, FLAGS, fd, 0);

 +  /* 1. obtain a PMD-aligned virtual address */
 +  ret = posix_memalign(&mptr, PMD_SIZE, fsize);
 +  if (!ret)
 +    free(mptr);  /* 2. release the virt addr */
 +
 +  /* 3. then pass the PMD-aligned virt addr to mmap() */
 +  mptr = mmap(mptr, fsize, PROT_READ|PROT_WRITE, FLAGS, fd, 0);

These changes add unnecessary dependency to DAX and PMD page size
into application code.  The kernel should assign a mmap address
appropriate for the operation.

Change arch_get_unmapped_area() and arch_get_unmapped_area_topdown()
to request PMD_SIZE alignment when the request is for a DAX file and
its mapping range is large enough for using a PMD page.

Signed-off-by: Toshi Kani <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
---
 arch/x86/kernel/sys_x86_64.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 10e0272..a294c66 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -157,6 +157,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long 
addr,
                info.align_mask = get_align_mask();
                info.align_offset += get_align_bits();
        }
+       if (filp && IS_ENABLED(CONFIG_FS_DAX_PMD) && IS_DAX(file_inode(filp))) {
+               unsigned long off_end = info.align_offset + len;
+               unsigned long off_pmd = round_up(info.align_offset, PMD_SIZE);
+
+               if ((off_end > off_pmd) && ((off_end - off_pmd) >= PMD_SIZE))
+                       info.align_mask |= (PMD_SIZE - 1);
+       }
        return vm_unmapped_area(&info);
 }
 
@@ -200,6 +207,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const 
unsigned long addr0,
                info.align_mask = get_align_mask();
                info.align_offset += get_align_bits();
        }
+       if (filp && IS_ENABLED(CONFIG_FS_DAX_PMD) && IS_DAX(file_inode(filp))) {
+               unsigned long off_end = info.align_offset + len;
+               unsigned long off_pmd = round_up(info.align_offset, PMD_SIZE);
+
+               if ((off_end > off_pmd) && ((off_end - off_pmd) >= PMD_SIZE))
+                       info.align_mask |= (PMD_SIZE - 1);
+       }
        addr = vm_unmapped_area(&info);
        if (!(addr & ~PAGE_MASK))
                return addr;

Reply via email to