Hi all,

Today's linux-next merge of the xfs tree got a conflict in
fs/xfs/xfs_file.c between commits 99733fa372ea
("xfs_file_aio_write_checks: switch to iocb/iov_iter") and 3309dd04cbcd
("switch generic_write_checks() to iocb and iter") from Linus' tree and
commits b9d59846f737 ("xfs: DIO write completion size updates race"),
40c63fbc55a9 ("xfs: direct IO EOF zeroing needs to drain AIO") and
0cefb29e6a63 ("xfs: using generic_file_direct_write() is unnecessary")
from the xfs tree.

I fixed it up (I have no idea if this is right - see below) and can
carry the fix as necessary (no action is required).

-- 
Cheers,
Stephen Rothwell                    s...@canb.auug.org.au

diff --cc fs/xfs/xfs_file.c
index 1f12ad0a8585,3a5d305e60c9..000000000000
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -544,22 -545,21 +544,22 @@@ xfs_zero_eof
   */
  STATIC ssize_t
  xfs_file_aio_write_checks(
 -      struct file             *file,
 -      loff_t                  *pos,
 -      size_t                  *count,
 +      struct kiocb            *iocb,
 +      struct iov_iter         *from,
        int                     *iolock)
  {
 +      struct file             *file = iocb->ki_filp;
        struct inode            *inode = file->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
 -      int                     error = 0;
 +      ssize_t                 error = 0;
 +      size_t                  count = iov_iter_count(from);
  
  restart:
 -      error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
 -      if (error)
 +      error = generic_write_checks(iocb, from);
 +      if (error <= 0)
                return error;
  
-       error = xfs_break_layouts(inode, iolock);
+       error = xfs_break_layouts(inode, iolock, true);
        if (error)
                return error;
  
@@@ -569,21 -569,41 +569,42 @@@
         * write.  If zeroing is needed and we are currently holding the
         * iolock shared, we need to update it to exclusive which implies
         * having to redo all checks before.
+        *
+        * We need to serialise against EOF updates that occur in IO
+        * completions here. We want to make sure that nobody is changing the
+        * size while we do this check until we have placed an IO barrier (i.e.
+        * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+        * The spinlock effectively forms a memory barrier once we have the
+        * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+        * and hence be able to correctly determine if we need to run zeroing.
         */
+       spin_lock(&ip->i_flags_lock);
 -      if (*pos > i_size_read(inode)) {
 +      if (iocb->ki_pos > i_size_read(inode)) {
                bool    zero = false;
  
+               spin_unlock(&ip->i_flags_lock);
                if (*iolock == XFS_IOLOCK_SHARED) {
                        xfs_rw_iunlock(ip, *iolock);
                        *iolock = XFS_IOLOCK_EXCL;
                        xfs_rw_ilock(ip, *iolock);
 +                      iov_iter_reexpand(from, count);
+ 
+                       /*
+                        * We now have an IO submission barrier in place, but
+                        * AIO can do EOF updates during IO completion and hence
+                        * we now need to wait for all of them to drain. Non-AIO
+                        * DIO will have drained before we are given the
+                        * XFS_IOLOCK_EXCL, and so for most cases this wait is a
+                        * no-op.
+                        */
+                       inode_dio_wait(inode);
                        goto restart;
                }
 -              error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
 +              error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), 
&zero);
                if (error)
                        return error;
-       }
+       } else
+               spin_unlock(&ip->i_flags_lock);
  
        /*
         * Updating the timestamps will grab the ilock again from
@@@ -680,11 -702,11 +703,12 @@@ xfs_file_dio_aio_write
                xfs_rw_ilock(ip, iolock);
        }
  
 -      ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
 +      ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
                goto out;
 -      iov_iter_truncate(from, count);
 +      count = iov_iter_count(from);
 +      pos = iocb->ki_pos;
+       end = pos + count - 1;
  
        if (mapping->nrpages) {
                ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@@ -1385,8 -1449,59 +1449,57 @@@ xfs_file_llseek
        }
  }
  
+ /*
+  * Locking for serialisation of IO during page faults. This results in a lock
+  * ordering of:
+  *
+  * mmap_sem (MM)
+  *   i_mmap_lock (XFS - truncate serialisation)
+  *     page_lock (MM)
+  *       i_lock (XFS - extent map serialisation)
+  */
+ STATIC int
+ xfs_filemap_fault(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+ {
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+ 
+       trace_xfs_filemap_fault(ip);
+ 
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = filemap_fault(vma, vmf);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ 
+       return error;
+ }
+ 
+ /*
+  * mmap()d file has taken write protection fault and is being made writable. 
We
+  * can set the page state up correctly for a writable page, which means we can
+  * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+  * mapping.
+  */
+ STATIC int
+ xfs_filemap_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+ {
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+ 
+       trace_xfs_filemap_page_mkwrite(ip);
+ 
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ 
+       return error;
+ }
+ 
  const struct file_operations xfs_file_operations = {
        .llseek         = xfs_file_llseek,
 -      .read           = new_sync_read,
 -      .write          = new_sync_write,
        .read_iter      = xfs_file_read_iter,
        .write_iter     = xfs_file_write_iter,
        .splice_read    = xfs_file_splice_read,

Attachment: pgpYu2z3Bdljl.pgp
Description: OpenPGP digital signature

Reply via email to