At page fault time, check i_private which indicates a fallocate hole punch
is in progress.  If the fault falls within the hole, wait for the hole
punch operation to complete before proceeding with the fault.

Signed-off-by: Mike Kravetz <[email protected]>
---
 mm/hugetlb.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3c7db92..540d3a79 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3580,6 +3580,7 @@ int hugetlb_fault(struct mm_struct *mm, struct 
vm_area_struct *vma,
        struct page *pagecache_page = NULL;
        struct hstate *h = hstate_vma(vma);
        struct address_space *mapping;
+       struct inode *inode = file_inode(vma->vm_file);
        int need_wait_lock = 0;
 
        address &= huge_page_mask(h);
@@ -3603,6 +3604,42 @@ int hugetlb_fault(struct mm_struct *mm, struct 
vm_area_struct *vma,
        idx = vma_hugecache_offset(h, vma, address);
 
        /*
+        * page faults could race with fallocate hole punch.  If a page
+        * is faulted between unmap and deallocation, it will still remain
+        * in the punched hole.  During hole punch operations, a hugetlb_falloc
+        * structure will be pointed to by i_private.  If this fault is for
+        * a page in a hole being punched, wait for the operation to finish
+        * before proceeding.
+        *
+        * Even with this strategy, it is still possible for a page fault to
+        * race with hole punch.  However, the race window is considerably
+        * smaller.
+        */
+       if (unlikely(inode->i_private)) {
+               struct hugetlb_falloc *hugetlb_falloc;
+
+               spin_lock(&inode->i_lock);
+               hugetlb_falloc = inode->i_private;
+               if (hugetlb_falloc && hugetlb_falloc->waitq &&
+                   idx >= hugetlb_falloc->start &&
+                   idx <= hugetlb_falloc->end) {
+                       wait_queue_head_t *hugetlb_falloc_waitq;
+                       DEFINE_WAIT(hugetlb_fault_wait);
+
+                       hugetlb_falloc_waitq = hugetlb_falloc->waitq;
+                       prepare_to_wait(hugetlb_falloc_waitq,
+                                       &hugetlb_fault_wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       spin_unlock(&inode->i_lock);
+                       schedule();
+
+                       spin_lock(&inode->i_lock);
+                       finish_wait(hugetlb_falloc_waitq, &hugetlb_fault_wait);
+               }
+               spin_unlock(&inode->i_lock);
+       }
+
+       /*
         * Serialize hugepage allocation and instantiation, so that we don't
         * get spurious allocation failures if two CPUs race to instantiate
         * the same page in the page cache.
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to