Vishal Verma <vishal.l.ve...@intel.com> writes: > dax_do_io (called for read() or write() for a dax file system) may fail > in the presence of bad blocks or media errors. Since we expect that a > write should clear media errors on nvdimms, make dax_do_io fall back to > the direct_IO path, which will send down a bio to the driver, which can > then attempt to clear the error.
[snip] > + if (IS_DAX(inode)) { > + ret = dax_do_io(iocb, inode, iter, offset, blkdev_get_block, > NULL, DIO_SKIP_DIO_COUNT); > - return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, > + if (ret == -EIO && (iov_iter_rw(iter) == WRITE)) > + ret_saved = ret; > + else > + return ret; > + } > + > + ret = __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, > blkdev_get_block, NULL, NULL, > DIO_SKIP_DIO_COUNT); > + if (ret < 0 && ret_saved) > + return ret_saved; > + Hmm, did you just break async DIO? I think you did! :) __blockdev_direct_IO can return -EIOCBQUEUED, and you've now turned that into -EIO. Really, I don't see a reason to save that first -EIO. The same applies to all instances in this patch. Cheers, Jeff > + return ret; > } > > int __sync_blockdev(struct block_device *bdev, int wait) > diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c > index 824f249..64792c6 100644 > --- a/fs/ext2/inode.c > +++ b/fs/ext2/inode.c > @@ -859,14 +859,22 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter > *iter, loff_t offset) > struct address_space *mapping = file->f_mapping; > struct inode *inode = mapping->host; > size_t count = iov_iter_count(iter); > - ssize_t ret; > + ssize_t ret, ret_saved = 0; > > - if (IS_DAX(inode)) > - ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL, > - DIO_LOCKING); > - else > - ret = blockdev_direct_IO(iocb, inode, iter, offset, > - ext2_get_block); > + if (IS_DAX(inode)) { > + ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, > + NULL, DIO_LOCKING | DIO_SKIP_HOLES); > + if (ret == -EIO && iov_iter_rw(iter) == WRITE) > + ret_saved = ret; > + else > + goto out; > + } > + > + ret = blockdev_direct_IO(iocb, inode, iter, offset, ext2_get_block); > + if (ret < 0 && ret_saved) > + ret = ret_saved; > + > + out: > if (ret < 0 && iov_iter_rw(iter) == WRITE) > ext2_write_failed(mapping, offset + count); > return ret; > diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c > index 3027fa6..798f341 100644 > --- a/fs/ext4/indirect.c > +++ b/fs/ext4/indirect.c > @@ -716,14 +716,22 @@ retry: > NULL, NULL, 0); > inode_dio_end(inode); > } else { > + ssize_t ret_saved = 0; > + > locked: > - if (IS_DAX(inode)) > + if (IS_DAX(inode)) { > ret = dax_do_io(iocb, inode, iter, offset, > ext4_dio_get_block, NULL, DIO_LOCKING); > - else > - ret = blockdev_direct_IO(iocb, inode, iter, offset, > - ext4_dio_get_block); > - > + if (ret == -EIO && iov_iter_rw(iter) == WRITE) > + ret_saved = ret; > + else > + goto skip_dio; > + } > + ret = blockdev_direct_IO(iocb, inode, iter, offset, > + ext4_get_block); > + if (ret < 0 && ret_saved) > + ret = ret_saved; > +skip_dio: > if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { > loff_t isize = i_size_read(inode); > loff_t end = offset + count; > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index dab84a2..27f07c2 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -3341,7 +3341,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, > struct iov_iter *iter, > { > struct file *file = iocb->ki_filp; > struct inode *inode = file->f_mapping->host; > - ssize_t ret; > + ssize_t ret, ret_saved = 0; > size_t count = iov_iter_count(iter); > int overwrite = 0; > get_block_t *get_block_func = NULL; > @@ -3401,15 +3401,22 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, > struct iov_iter *iter, > #ifdef CONFIG_EXT4_FS_ENCRYPTION > BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); > #endif > - if (IS_DAX(inode)) > + if (IS_DAX(inode)) { > ret = dax_do_io(iocb, inode, iter, offset, get_block_func, > ext4_end_io_dio, dio_flags); > - else > - ret = __blockdev_direct_IO(iocb, inode, > - inode->i_sb->s_bdev, iter, offset, > - get_block_func, > - ext4_end_io_dio, NULL, dio_flags); > + if (ret == -EIO && iov_iter_rw(iter) == WRITE) > + ret_saved = ret; > + else > + goto skip_dio; > + } > > + ret = __blockdev_direct_IO(iocb, inode, > + inode->i_sb->s_bdev, iter, offset, > + get_block_func, > + ext4_end_io_dio, NULL, dio_flags); > + if (ret < 0 && ret_saved) > + ret = ret_saved; > + skip_dio: > if (ret > 0 && !overwrite && ext4_test_inode_state(inode, > EXT4_STATE_DIO_UNWRITTEN)) { > int err; > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > index d445a64..7cfcf86 100644 > --- a/fs/xfs/xfs_aops.c > +++ b/fs/xfs/xfs_aops.c > @@ -1413,6 +1413,7 @@ xfs_vm_direct_IO( > dio_iodone_t *endio = NULL; > int flags = 0; > struct block_device *bdev; > + ssize_t ret, ret_saved = 0; > > if (iov_iter_rw(iter) == WRITE) { > endio = xfs_end_io_direct_write; > @@ -1420,13 +1421,22 @@ xfs_vm_direct_IO( > } > > if (IS_DAX(inode)) { > - return dax_do_io(iocb, inode, iter, offset, > + ret = dax_do_io(iocb, inode, iter, offset, > xfs_get_blocks_direct, endio, 0); > + if (ret == -EIO && iov_iter_rw(iter) == WRITE) > + ret_saved = ret; > + else > + return ret; > } > > bdev = xfs_find_bdev_for_inode(inode); > - return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, > + ret = __blockdev_direct_IO(iocb, inode, bdev, iter, offset, > xfs_get_blocks_direct, endio, NULL, flags); > + > + if (ret < 0 && ret_saved) > + ret = ret_saved; > + > + return ret; > } > > /*