From: Matthew Wilcox <wi...@linux.intel.com>

When we handle a write-fault on a DAX mapping, we currently insert a
read-only mapping and then take the page fault again to convert it to
a writable mapping.  This is necessary for the case where we cover a
hole with a read-only zero page, but when we have a data block already
allocated, it is inefficient.

Use the recently added vmf_insert_pfn_prot() to insert a writable mapping,
even though the default VM flags say to use a read-only mapping.

Signed-off-by: Matthew Wilcox <wi...@linux.intel.com>
---
 fs/dax.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 206650f..3f6138d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -519,9 +519,44 @@ int dax_writeback_mapping_range(struct address_space 
*mapping, loff_t start,
 }
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
 
+/*
+ * The default page protections for DAX VMAs are set to "copy" so that
+ * we get notifications when zero pages are written to.  This function
+ * is called when we're inserting a mapping to a data page.  If this is
+ * a write fault, we've already done all the necessary accounting and
+ * it's pointless to insert this translation entry read-only.  Convert
+ * the pgprot to be writable.
+ *
+ * While this is not the most elegant code, the compiler can see that (on
+ * any sane architecture) all four arms of the conditional are the same.
+ */
+static pgprot_t dax_pgprot(struct vm_area_struct *vma, bool write)
+{
+       pgprot_t pgprot = vma->vm_page_prot;
+       if (!write)
+               return pgprot;
+       if ((vma->vm_flags & (VM_READ|VM_EXEC)) == (VM_READ|VM_EXEC))
+               return __pgprot(pgprot_val(pgprot) ^
+                               pgprot_val(__P111) ^
+                               pgprot_val(__S111));
+       else if ((vma->vm_flags & (VM_READ|VM_EXEC)) == VM_READ)
+               return __pgprot(pgprot_val(pgprot) ^
+                               pgprot_val(__P110) ^
+                               pgprot_val(__S110));
+       else if ((vma->vm_flags & (VM_READ|VM_EXEC)) == VM_EXEC)
+               return __pgprot(pgprot_val(pgprot) ^
+                               pgprot_val(__P011) ^
+                               pgprot_val(__S011));
+       else
+               return __pgprot(pgprot_val(pgprot) ^
+                               pgprot_val(__P010) ^
+                               pgprot_val(__S010));
+}
+
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                        struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+       bool write = vmf->flags & FAULT_FLAG_WRITE;
        unsigned long vaddr = (unsigned long)vmf->virtual_address;
        struct address_space *mapping = inode->i_mapping;
        struct block_device *bdev = bh->b_bdev;
@@ -530,7 +565,7 @@ static int dax_insert_mapping(struct inode *inode, struct 
buffer_head *bh,
                .size = bh->b_size,
        };
        pgoff_t size;
-       int error;
+       int result;
 
        i_mmap_lock_read(mapping);
 
@@ -542,15 +577,11 @@ static int dax_insert_mapping(struct inode *inode, struct 
buffer_head *bh,
         * allocated past the end of the file.
         */
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (unlikely(vmf->pgoff >= size)) {
-               error = -EIO;
-               goto out;
-       }
+       if (unlikely(vmf->pgoff >= size))
+               goto sigbus;
 
-       if (dax_map_atomic(bdev, &dax) < 0) {
-               error = PTR_ERR(dax.addr);
-               goto out;
-       }
+       if (dax_map_atomic(bdev, &dax) < 0)
+               goto sigbus;
 
        if (buffer_unwritten(bh) || buffer_new(bh)) {
                clear_pmem(dax.addr, PAGE_SIZE);
@@ -558,17 +589,19 @@ static int dax_insert_mapping(struct inode *inode, struct 
buffer_head *bh,
        }
        dax_unmap_atomic(bdev, &dax);
 
-       error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
-                       vmf->flags & FAULT_FLAG_WRITE);
-       if (error)
-               goto out;
+       if (dax_radix_entry(mapping, vmf->pgoff, dax.sector, false, write))
+               goto sigbus;
 
-       error = vm_insert_mixed(vma, vaddr, dax.pfn);
+       result = vmf_insert_pfn_prot(vma, vaddr, dax.pfn,
+                                       dax_pgprot(vma, write));
 
  out:
        i_mmap_unlock_read(mapping);
+       return result;
 
-       return error;
+ sigbus:
+       result = VM_FAULT_SIGBUS;
+       goto out;
 }
 
 /**
@@ -599,7 +632,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault 
*vmf,
        unsigned blkbits = inode->i_blkbits;
        sector_t block;
        pgoff_t size;
-       int error;
+       int result, error;
        int major = 0;
 
        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -701,19 +734,19 @@ int __dax_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf,
         * indicate what the callback should do via the uptodate variable, same
         * as for normal BH based IO completions.
         */
-       error = dax_insert_mapping(inode, &bh, vma, vmf);
+       result = dax_insert_mapping(inode, &bh, vma, vmf);
        if (buffer_unwritten(&bh)) {
                if (complete_unwritten)
-                       complete_unwritten(&bh, !error);
+                       complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
                else
                        WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
        }
+       return result | major;
 
  out:
        if (error == -ENOMEM)
                return VM_FAULT_OOM | major;
-       /* -EBUSY is fine, somebody else faulted on the same PTE */
-       if ((error < 0) && (error != -EBUSY))
+       if (error < 0)
                return VM_FAULT_SIGBUS | major;
        return VM_FAULT_NOPAGE | major;
 
-- 
2.7.0.rc3

Reply via email to