[f2fs-dev] [PATCH 3/7] vfs: flush and wait for io when setting the immutable flag via FSSETXATTR

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

When we're using FS_IOC_FSSETXATTR to set the immutable flag on a file,
we need to ensure that userspace can't continue to write the file after
the file becomes immutable.  To make that happen, we have to flush all
the dirty pagecache pages to disk to ensure that we can fail a page
fault on a mmap'd region, wait for pending directio to complete, and
hope the caller locked out any new writes by holding the inode lock.

Signed-off-by: Darrick J. Wong 
---
 fs/btrfs/ioctl.c   |3 +++
 fs/ext4/ioctl.c|3 +++
 fs/f2fs/file.c |3 +++
 fs/xfs/xfs_ioctl.c |   39 +--
 include/linux/fs.h |   37 +
 5 files changed, 79 insertions(+), 6 deletions(-)


diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f431813b2454..63a9281e6ce0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -432,6 +432,9 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void 
__user *arg)
 
__btrfs_ioctl_fsgetxattr(binode, _fa);
ret = vfs_ioc_fssetxattr_check(inode, _fa, );
+   if (ret)
+   goto out_unlock;
+   ret = vfs_ioc_fssetxattr_flush_data(inode, );
if (ret)
goto out_unlock;
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a05341b94d98..6037585c1520 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1115,6 +1115,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
inode_lock(inode);
ext4_fsgetxattr(inode, _fa);
err = vfs_ioc_fssetxattr_check(inode, _fa, );
+   if (err)
+   goto out;
+   err = vfs_ioc_fssetxattr_flush_data(inode, );
if (err)
goto out;
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index d3cf4bdb8738..97f4bb36540f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2832,6 +2832,9 @@ static int f2fs_ioc_fssetxattr(struct file *filp, 
unsigned long arg)
 
__f2fs_ioc_fsgetxattr(inode, _fa);
err = vfs_ioc_fssetxattr_check(inode, _fa, );
+   if (err)
+   goto out;
+   err = vfs_ioc_fssetxattr_flush_data(inode, );
if (err)
goto out;
flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index b494e7e881e3..88583b3e1e76 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1014,6 +1014,28 @@ xfs_diflags_to_linux(
 #endif
 }
 
+/*
+ * Lock the inode against file io and page faults, then flush all dirty pages
+ * and wait for writeback and direct IO operations to finish.  Returns with
+ * the relevant inode lock flags set in @join_flags.  Caller is responsible for
+ * unlocking even on error return.
+ */
+static int
+xfs_ioctl_setattr_flush(
+   struct xfs_inode*ip,
+   int *join_flags)
+{
+   /* Already locked the inode from IO?  Assume we're done. */
+   if (((*join_flags) & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL)) ==
+(XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL))
+   return 0;
+
+   /* Lock and flush all mappings and IO in preparation for flag change */
+   *join_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
+   xfs_ilock(ip, *join_flags);
+   return inode_flush_data(VFS_I(ip));
+}
+
 static int
 xfs_ioctl_setattr_xflags(
struct xfs_trans*tp,
@@ -1099,23 +1121,22 @@ xfs_ioctl_setattr_dax_invalidate(
if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
return 0;
 
-   if (S_ISDIR(inode->i_mode))
+   if (!S_ISREG(inode->i_mode))
return 0;
 
-   /* lock, flush and invalidate mapping in preparation for flag change */
-   xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
-   error = filemap_write_and_wait(inode->i_mapping);
+   error = xfs_ioctl_setattr_flush(ip, join_flags);
if (error)
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (error)
goto out_unlock;
 
-   *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
return 0;
 
 out_unlock:
-   xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
+   if (*join_flags)
+   xfs_iunlock(ip, *join_flags);
+   *join_flags = 0;
return error;
 
 }
@@ -1337,6 +1358,12 @@ xfs_ioctl_setattr(
if (code)
goto error_free_dquots;
 
+   if (!join_flags && vfs_ioc_fssetxattr_need_flush(VFS_I(ip), fa)) {
+   code = xfs_ioctl_setattr_flush(ip, _flags);
+   if (code)
+   goto error_free_dquots;
+   }
+
tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
if (IS_ERR(tp)) {
code = PTR_ERR(tp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed9a74cf5ef3..b4553d01e254 100644

[f2fs-dev] [PATCH 1/4] vfs: create a generic checking function for FS_IOC_SETFLAGS

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

Create a generic checking function for the incoming FS_IOC_SETFLAGS flag
values so that we can standardize the implementations that follow ext4's
flag values.

Signed-off-by: Darrick J. Wong 
Reviewed-by: Jan Kara 
---
 fs/btrfs/ioctl.c|   13 +
 fs/efivarfs/file.c  |   18 +-
 fs/ext2/ioctl.c |   16 
 fs/ext4/ioctl.c |   13 +++--
 fs/f2fs/file.c  |7 ---
 fs/gfs2/file.c  |   42 +-
 fs/hfsplus/ioctl.c  |   21 -
 fs/inode.c  |   17 +
 fs/jfs/ioctl.c  |   22 +++---
 fs/nilfs2/ioctl.c   |9 ++---
 fs/ocfs2/ioctl.c|   13 +++--
 fs/reiserfs/ioctl.c |   10 --
 fs/ubifs/ioctl.c|   13 +++--
 include/linux/fs.h  |2 ++
 14 files changed, 108 insertions(+), 108 deletions(-)


diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6dafa857bbb9..f408aa93b0cf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -187,7 +187,7 @@ static int btrfs_ioctl_setflags(struct file *file, void 
__user *arg)
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
-   unsigned int fsflags;
+   unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
u32 binode_flags = binode->flags;
@@ -212,13 +212,10 @@ static int btrfs_ioctl_setflags(struct file *file, void 
__user *arg)
inode_lock(inode);
 
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
-   if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) &
-   (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
-   if (!capable(CAP_LINUX_IMMUTABLE)) {
-   ret = -EPERM;
-   goto out_unlock;
-   }
-   }
+   old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+   ret = vfs_ioc_setflags_check(inode, old_fsflags, fsflags);
+   if (ret)
+   goto out_unlock;
 
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 8e568428c88b..f4f6c1bec132 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -110,16 +110,22 @@ static ssize_t efivarfs_file_read(struct file *file, char 
__user *userbuf,
return size;
 }
 
-static int
-efivarfs_ioc_getxflags(struct file *file, void __user *arg)
+static inline unsigned int efivarfs_getflags(struct inode *inode)
 {
-   struct inode *inode = file->f_mapping->host;
unsigned int i_flags;
unsigned int flags = 0;
 
i_flags = inode->i_flags;
if (i_flags & S_IMMUTABLE)
flags |= FS_IMMUTABLE_FL;
+   return flags;
+}
+
+static int
+efivarfs_ioc_getxflags(struct file *file, void __user *arg)
+{
+   struct inode *inode = file->f_mapping->host;
+   unsigned int flags = efivarfs_getflags(inode);
 
if (copy_to_user(arg, , sizeof(flags)))
return -EFAULT;
@@ -132,6 +138,7 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
struct inode *inode = file->f_mapping->host;
unsigned int flags;
unsigned int i_flags = 0;
+   unsigned int oldflags = efivarfs_getflags(inode);
int error;
 
if (!inode_owner_or_capable(inode))
@@ -143,8 +150,9 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
if (flags & ~FS_IMMUTABLE_FL)
return -EOPNOTSUPP;
 
-   if (!capable(CAP_LINUX_IMMUTABLE))
-   return -EPERM;
+   error = vfs_ioc_setflags_check(inode, oldflags, flags);
+   if (error)
+   return error;
 
if (flags & FS_IMMUTABLE_FL)
i_flags |= S_IMMUTABLE;
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 0367c0039e68..88b3b9720023 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -60,18 +60,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
}
oldflags = ei->i_flags;
 
-   /*
-* The IMMUTABLE and APPEND_ONLY flags can only be changed by
-* the relevant capability.
-*
-* This test looks nicer. Thanks to Pauline Middelink
-*/
-   if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
-   if (!capable(CAP_LINUX_IMMUTABLE)) {
-   inode_unlock(inode);
-   ret = -EPERM;
-   goto setflags_out;
-   }
+   ret = vfs_ioc_setflags_check(inode, oldflags, flags);
+   if (ret) {
+   inode_unlock(inode);
+   goto setflags_out;
}
 
flags = flags & EXT2_FL_USER_MODIFIABLE;
diff --git 

[f2fs-dev] [PATCH 5/7] xfs: refactor setflags to use setattr code directly

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

Refactor the SETFLAGS implementation to use the SETXATTR code directly
instead of partially constructing a struct fsxattr and calling bits and
pieces of the setxattr code.  This reduces code size and becomes
necessary in the next patch to maintain the behavior of allowing
userspace to set immutable on an immutable file so long as nothing
/else/ about the attributes change.

Signed-off-by: Darrick J. Wong 
---
 fs/xfs/xfs_ioctl.c |   40 +++-
 1 file changed, 3 insertions(+), 37 deletions(-)


diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 88583b3e1e76..7b19ba2956ad 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1491,11 +1491,8 @@ xfs_ioc_setxflags(
struct file *filp,
void__user *arg)
 {
-   struct xfs_trans*tp;
struct fsxattr  fa;
-   struct fsxattr  old_fa;
unsigned intflags;
-   int join_flags = 0;
int error;
 
if (copy_from_user(, arg, sizeof(flags)))
@@ -1506,44 +1503,13 @@ xfs_ioc_setxflags(
  FS_SYNC_FL))
return -EOPNOTSUPP;
 
-   fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
+   __xfs_ioc_fsgetxattr(ip, false, );
+   fa.fsx_xflags = xfs_merge_ioc_xflags(flags, fa.fsx_xflags);
 
error = mnt_want_write_file(filp);
if (error)
return error;
-
-   /*
-* Changing DAX config may require inode locking for mapping
-* invalidation. These need to be held all the way to transaction commit
-* or cancel time, so need to be passed through to
-* xfs_ioctl_setattr_get_trans() so it can apply them to the join call
-* appropriately.
-*/
-   error = xfs_ioctl_setattr_dax_invalidate(ip, , _flags);
-   if (error)
-   goto out_drop_write;
-
-   tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
-   if (IS_ERR(tp)) {
-   error = PTR_ERR(tp);
-   goto out_drop_write;
-   }
-
-   __xfs_ioc_fsgetxattr(ip, false, _fa);
-   error = vfs_ioc_fssetxattr_check(VFS_I(ip), _fa, );
-   if (error) {
-   xfs_trans_cancel(tp);
-   goto out_drop_write;
-   }
-
-   error = xfs_ioctl_setattr_xflags(tp, ip, );
-   if (error) {
-   xfs_trans_cancel(tp);
-   goto out_drop_write;
-   }
-
-   error = xfs_trans_commit(tp);
-out_drop_write:
+   error = xfs_ioctl_setattr(ip, );
mnt_drop_write_file(filp);
return error;
 }



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 2/7] vfs: flush and wait for io when setting the immutable flag via SETFLAGS

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

When we're using FS_IOC_SETFLAGS to set the immutable flag on a file, we
need to ensure that userspace can't continue to write the file after the
file becomes immutable.  To make that happen, we have to flush all the
dirty pagecache pages to disk to ensure that we can fail a page fault on
a mmap'd region, wait for pending directio to complete, and hope the
caller locked out any new writes by holding the inode lock.

Signed-off-by: Darrick J. Wong 
---
 fs/btrfs/ioctl.c   |3 +++
 fs/efivarfs/file.c |5 +
 fs/ext2/ioctl.c|5 +
 fs/ext4/ioctl.c|3 +++
 fs/f2fs/file.c |3 +++
 fs/hfsplus/ioctl.c |3 +++
 fs/nilfs2/ioctl.c  |3 +++
 fs/ocfs2/ioctl.c   |3 +++
 fs/orangefs/file.c |   11 ---
 fs/orangefs/protocol.h |3 +++
 fs/reiserfs/ioctl.c|3 +++
 fs/ubifs/ioctl.c   |3 +++
 include/linux/fs.h |   48 
 13 files changed, 93 insertions(+), 3 deletions(-)


diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7ddda5b4b6a6..f431813b2454 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -214,6 +214,9 @@ static int btrfs_ioctl_setflags(struct file *file, void 
__user *arg)
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
ret = vfs_ioc_setflags_check(inode, old_fsflags, fsflags);
+   if (ret)
+   goto out_unlock;
+   ret = vfs_ioc_setflags_flush_data(inode, fsflags);
if (ret)
goto out_unlock;
 
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index f4f6c1bec132..845016a67724 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -163,6 +163,11 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
return error;
 
inode_lock(inode);
+   error = vfs_ioc_setflags_flush_data(inode, flags);
+   if (error) {
+   inode_unlock(inode);
+   return error;
+   }
inode_set_flags(inode, i_flags, S_IMMUTABLE);
inode_unlock(inode);
 
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 88b3b9720023..75f75619237c 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -65,6 +65,11 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
inode_unlock(inode);
goto setflags_out;
}
+   ret = vfs_ioc_setflags_flush_data(inode, flags);
+   if (ret) {
+   inode_unlock(inode);
+   goto setflags_out;
+   }
 
flags = flags & EXT2_FL_USER_MODIFIABLE;
flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 6aa1df1918f7..a05341b94d98 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -290,6 +290,9 @@ static int ext4_ioctl_setflags(struct inode *inode,
jflag = flags & EXT4_JOURNAL_DATA_FL;
 
err = vfs_ioc_setflags_check(inode, oldflags, flags);
+   if (err)
+   goto flags_out;
+   err = vfs_ioc_setflags_flush_data(inode, flags);
if (err)
goto flags_out;
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 183ed1ac60e1..d3cf4bdb8738 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1681,6 +1681,9 @@ static int __f2fs_ioc_setflags(struct inode *inode, 
unsigned int flags)
oldflags = fi->i_flags;
 
err = vfs_ioc_setflags_check(inode, oldflags, flags);
+   if (err)
+   return err;
+   err = vfs_ioc_setflags_flush_data(inode, flags);
if (err)
return err;
 
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 862a3c9481d7..f8295fa35237 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -104,6 +104,9 @@ static int hfsplus_ioctl_setflags(struct file *file, int 
__user *user_flags)
inode_lock(inode);
 
err = vfs_ioc_setflags_check(inode, oldflags, flags);
+   if (err)
+   goto out_unlock_inode;
+   err = vfs_ioc_setflags_flush_data(inode, flags);
if (err)
goto out_unlock_inode;
 
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 0632336d2515..a3c200ab9f60 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -149,6 +149,9 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct 
file *filp,
oldflags = NILFS_I(inode)->i_flags;
 
ret = vfs_ioc_setflags_check(inode, oldflags, flags);
+   if (ret)
+   goto out;
+   ret = vfs_ioc_setflags_flush_data(inode, flags);
if (ret)
goto out;
 
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 467a2faf0305..e91ca0dad3d7 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -107,6 +107,9 @@ static int ocfs2_set_inode_attr(struct inode *inode, 
unsigned flags,
flags |= oldflags & ~mask;
 

[f2fs-dev] [PATCH 7/7] vfs: don't allow writes to swap files

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

Don't let userspace write to an active swap file because the kernel
effectively has a long term lease on the storage and things could get
seriously corrupted if we let this happen.

Signed-off-by: Darrick J. Wong 
---
 fs/attr.c |3 +++
 mm/filemap.c  |3 +++
 mm/memory.c   |4 +++-
 mm/mmap.c |2 ++
 mm/swapfile.c |   15 +--
 5 files changed, 24 insertions(+), 3 deletions(-)


diff --git a/fs/attr.c b/fs/attr.c
index 1fcfdcc5b367..42f4d4fb0631 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -236,6 +236,9 @@ int notify_change(struct dentry * dentry, struct iattr * 
attr, struct inode **de
if (IS_IMMUTABLE(inode))
return -EPERM;
 
+   if (IS_SWAPFILE(inode))
+   return -ETXTBSY;
+
if ((ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) &&
IS_APPEND(inode))
return -EPERM;
diff --git a/mm/filemap.c b/mm/filemap.c
index dad85e10f5f8..fd80bc20e30a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2938,6 +2938,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, 
struct iov_iter *from)
if (IS_IMMUTABLE(inode))
return -EPERM;
 
+   if (IS_SWAPFILE(inode))
+   return -ETXTBSY;
+
if (!iov_iter_count(from))
return 0;
 
diff --git a/mm/memory.c b/mm/memory.c
index 4311cfdade90..c04c6a689995 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2235,7 +2235,9 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
 
vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
 
-   if (vmf->vma->vm_file && IS_IMMUTABLE(file_inode(vmf->vma->vm_file)))
+   if (vmf->vma->vm_file &&
+   (IS_IMMUTABLE(file_inode(vmf->vma->vm_file)) ||
+IS_SWAPFILE(file_inode(vmf->vma->vm_file
return VM_FAULT_SIGBUS;
 
ret = vmf->vma->vm_ops->page_mkwrite(vmf);
diff --git a/mm/mmap.c b/mm/mmap.c
index ac1e32205237..031807339869 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1488,6 +1488,8 @@ unsigned long do_mmap(struct file *file, unsigned long 
addr,
return -EACCES;
if (IS_IMMUTABLE(file_inode(file)))
return -EPERM;
+   if (IS_SWAPFILE(file_inode(file)))
+   return -ETXTBSY;
}
 
/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 596ac98051c5..390859785558 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3165,6 +3165,19 @@ SYSCALL_DEFINE2(swapon, const char __user *, 
specialfile, int, swap_flags)
if (error)
goto bad_swap;
 
+   /*
+* Flush any pending IO and dirty mappings before we start using this
+* swap file.
+*/
+   if (S_ISREG(inode->i_mode)) {
+   inode->i_flags |= S_SWAPFILE;
+   error = inode_flush_data(inode);
+   if (error) {
+   inode->i_flags &= ~S_SWAPFILE;
+   goto bad_swap;
+   }
+   }
+
mutex_lock(_mutex);
prio = -1;
if (swap_flags & SWAP_FLAG_PREFER)
@@ -3185,8 +3198,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, 
int, swap_flags)
atomic_inc(_poll_event);
wake_up_interruptible(_poll_wait);
 
-   if (S_ISREG(inode->i_mode))
-   inode->i_flags |= S_SWAPFILE;
error = 0;
goto out;
 bad_swap:



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 1/7] mm/fs: don't allow writes to immutable files

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

The chattr manpage has this to say about immutable files:

"A file with the 'i' attribute cannot be modified: it cannot be deleted
or renamed, no link can be created to this file, most of the file's
metadata can not be modified, and the file can not be opened in write
mode."

Once the flag is set, it is enforced for quite a few file operations,
such as fallocate, fpunch, fzero, rm, touch, open, etc.  However, we
don't check for immutability when doing a write(), a PROT_WRITE mmap(),
a truncate(), or a write to a previously established mmap.

If a program has an open write fd to a file that the administrator
subsequently marks immutable, the program still can change the file
contents.  Weird!

The ability to write to an immutable file does not follow the manpage
promise that immutable files cannot be modified.  Worse yet it's
inconsistent with the behavior of other syscalls which don't allow
modifications of immutable files.

Therefore, add the necessary checks to make the write, mmap, and
truncate behavior consistent with what the manpage says and consistent
with other syscalls on filesystems which support IMMUTABLE.

Signed-off-by: Darrick J. Wong 
---
 fs/attr.c|   13 ++---
 mm/filemap.c |3 +++
 mm/memory.c  |3 +++
 mm/mmap.c|8 ++--
 4 files changed, 18 insertions(+), 9 deletions(-)


diff --git a/fs/attr.c b/fs/attr.c
index d22e8187477f..1fcfdcc5b367 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -233,19 +233,18 @@ int notify_change(struct dentry * dentry, struct iattr * 
attr, struct inode **de
 
WARN_ON_ONCE(!inode_is_locked(inode));
 
-   if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
-   if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-   return -EPERM;
-   }
+   if (IS_IMMUTABLE(inode))
+   return -EPERM;
+
+   if ((ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) &&
+   IS_APPEND(inode))
+   return -EPERM;
 
/*
 * If utimes(2) and friends are called with times == NULL (or both
 * times are UTIME_NOW), then we need to check for write permission
 */
if (ia_valid & ATTR_TOUCH) {
-   if (IS_IMMUTABLE(inode))
-   return -EPERM;
-
if (!inode_owner_or_capable(inode)) {
error = inode_permission(inode, MAY_WRITE);
if (error)
diff --git a/mm/filemap.c b/mm/filemap.c
index aac71aef4c61..dad85e10f5f8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2935,6 +2935,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, 
struct iov_iter *from)
loff_t count;
int ret;
 
+   if (IS_IMMUTABLE(inode))
+   return -EPERM;
+
if (!iov_iter_count(from))
return 0;
 
diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd0c317..4311cfdade90 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2235,6 +2235,9 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
 
vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
 
+   if (vmf->vma->vm_file && IS_IMMUTABLE(file_inode(vmf->vma->vm_file)))
+   return VM_FAULT_SIGBUS;
+
ret = vmf->vma->vm_ops->page_mkwrite(vmf);
/* Restore original flags so that caller is not surprised */
vmf->flags = old_flags;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7e8c3e8ae75f..ac1e32205237 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1483,8 +1483,12 @@ unsigned long do_mmap(struct file *file, unsigned long 
addr,
case MAP_SHARED_VALIDATE:
if (flags & ~flags_mask)
return -EOPNOTSUPP;
-   if ((prot_WRITE) && !(file->f_mode_WRITE))
-   return -EACCES;
+   if (prot & PROT_WRITE) {
+   if (!(file->f_mode & FMODE_WRITE))
+   return -EACCES;
+   if (IS_IMMUTABLE(file_inode(file)))
+   return -EPERM;
+   }
 
/*
 * Make sure we don't allow writing to an append-only



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 4/7] vfs: don't allow most setxattr to immutable files

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

The chattr manpage has this to say about immutable files:

"A file with the 'i' attribute cannot be modified: it cannot be deleted
or renamed, no link can be created to this file, most of the file's
metadata can not be modified, and the file can not be opened in write
mode."

However, we don't actually check the immutable flag in the setattr code,
which means that we can update inode flags and project ids and extent
size hints on supposedly immutable files.  Therefore, reject setflags
and fssetxattr calls on an immutable file if the file is immutable and
will remain that way.

Signed-off-by: Darrick J. Wong 
---
 fs/inode.c |   27 +++
 1 file changed, 27 insertions(+)


diff --git a/fs/inode.c b/fs/inode.c
index 6374ad2ef25b..220caefc31f7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2204,6 +2204,14 @@ int vfs_ioc_setflags_check(struct inode *inode, int 
oldflags, int flags)
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
 
+   /*
+* We aren't allowed to change any other flags if the immutable flag is
+* already set and is not being unset.
+*/
+   if ((oldflags & FS_IMMUTABLE_FL) && (flags & FS_IMMUTABLE_FL) &&
+   oldflags != flags)
+   return -EPERM;
+
return 0;
 }
 EXPORT_SYMBOL(vfs_ioc_setflags_check);
@@ -2246,6 +2254,25 @@ int vfs_ioc_fssetxattr_check(struct inode *inode, const 
struct fsxattr *old_fa,
!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
return -EINVAL;
 
+   /*
+* We aren't allowed to change any fields if the immutable flag is
+* already set and is not being unset.
+*/
+   if ((old_fa->fsx_xflags & FS_XFLAG_IMMUTABLE) &&
+   (fa->fsx_xflags & FS_XFLAG_IMMUTABLE)) {
+   if (old_fa->fsx_xflags != fa->fsx_xflags)
+   return -EPERM;
+   if (old_fa->fsx_projid != fa->fsx_projid)
+   return -EPERM;
+   if ((fa->fsx_xflags & (FS_XFLAG_EXTSIZE |
+  FS_XFLAG_EXTSZINHERIT)) &&
+   old_fa->fsx_extsize != fa->fsx_extsize)
+   return -EPERM;
+   if ((old_fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
+   old_fa->fsx_cowextsize != fa->fsx_cowextsize)
+   return -EPERM;
+   }
+
/* Extent size hints of zero turn off the flags. */
if (fa->fsx_extsize == 0)
fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 6/7] xfs: clean up xfs_merge_ioc_xflags

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

Clean up the calling convention since we're editing the fsxattr struct
anyway.

Signed-off-by: Darrick J. Wong 
---
 fs/xfs/xfs_ioctl.c |   32 ++--
 1 file changed, 14 insertions(+), 18 deletions(-)


diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 7b19ba2956ad..a67bc9afdd0b 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -829,35 +829,31 @@ xfs_ioc_ag_geometry(
  * Linux extended inode flags interface.
  */
 
-STATIC unsigned int
+static inline void
 xfs_merge_ioc_xflags(
-   unsigned intflags,
-   unsigned intstart)
+   struct fsxattr  *fa,
+   unsigned intflags)
 {
-   unsigned intxflags = start;
-
if (flags & FS_IMMUTABLE_FL)
-   xflags |= FS_XFLAG_IMMUTABLE;
+   fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
else
-   xflags &= ~FS_XFLAG_IMMUTABLE;
+   fa->fsx_xflags &= ~FS_XFLAG_IMMUTABLE;
if (flags & FS_APPEND_FL)
-   xflags |= FS_XFLAG_APPEND;
+   fa->fsx_xflags |= FS_XFLAG_APPEND;
else
-   xflags &= ~FS_XFLAG_APPEND;
+   fa->fsx_xflags &= ~FS_XFLAG_APPEND;
if (flags & FS_SYNC_FL)
-   xflags |= FS_XFLAG_SYNC;
+   fa->fsx_xflags |= FS_XFLAG_SYNC;
else
-   xflags &= ~FS_XFLAG_SYNC;
+   fa->fsx_xflags &= ~FS_XFLAG_SYNC;
if (flags & FS_NOATIME_FL)
-   xflags |= FS_XFLAG_NOATIME;
+   fa->fsx_xflags |= FS_XFLAG_NOATIME;
else
-   xflags &= ~FS_XFLAG_NOATIME;
+   fa->fsx_xflags &= ~FS_XFLAG_NOATIME;
if (flags & FS_NODUMP_FL)
-   xflags |= FS_XFLAG_NODUMP;
+   fa->fsx_xflags |= FS_XFLAG_NODUMP;
else
-   xflags &= ~FS_XFLAG_NODUMP;
-
-   return xflags;
+   fa->fsx_xflags &= ~FS_XFLAG_NODUMP;
 }
 
 STATIC unsigned int
@@ -1504,7 +1500,7 @@ xfs_ioc_setxflags(
return -EOPNOTSUPP;
 
__xfs_ioc_fsgetxattr(ip, false, );
-   fa.fsx_xflags = xfs_merge_ioc_xflags(flags, fa.fsx_xflags);
+   xfs_merge_ioc_xflags(, flags);
 
error = mnt_want_write_file(filp);
if (error)



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v4 0/7] vfs: make immutable files actually immutable

2019-06-21 Thread Darrick J. Wong
Hi all,

The chattr(1) manpage has this to say about the immutable bit that
system administrators can set on files:

"A file with the 'i' attribute cannot be modified: it cannot be deleted
or renamed, no link can be created to this file, most of the file's
metadata can not be modified, and the file can not be opened in write
mode."

Given the clause about how the file 'cannot be modified', it is
surprising that programs holding writable file descriptors can continue
to write to and truncate files after the immutable flag has been set,
but they cannot call other things such as utimes, fallocate, unlink,
link, setxattr, or reflink.

Since the immutable flag is only settable by administrators, resolve
this inconsistent behavior in favor of the documented behavior -- once
the flag is set, the file cannot be modified, period.  We presume that
administrators must be trusted to know what they're doing, and that
cutting off programs with writable fds will probably break them.

Therefore, add immutability checks to the relevant VFS functions, then
refactor the SETFLAGS and FSSETXATTR implementations to use common
argument checking functions so that we can then force pagefaults on all
the file data when setting immutability.

Note that various distro manpages points out the inconsistent behavior
of the various Linux filesystems w.r.t. immutable.  This fixes all that.

I also discovered that userspace programs can write and create writable
memory mappings to active swap files.  This is extremely bad because
this allows anyone with write privileges to corrupt system memory.  The
final patch in this series closes off that hole, at least for swap
files.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This has been lightly tested with fstests.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=immutable-files

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=immutable-files


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 3/4] fs: teach vfs_ioc_fssetxattr_check to check project id info

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

Standardize the project id checks for FSSETXATTR.

Signed-off-by: Darrick J. Wong 
Reviewed-by: Jan Kara 
---
 fs/ext4/ioctl.c|   27 ---
 fs/f2fs/file.c |   27 ---
 fs/inode.c |   13 +
 fs/xfs/xfs_ioctl.c |   15 ---
 4 files changed, 13 insertions(+), 69 deletions(-)


diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c2f48c90ca45..6aa1df1918f7 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -697,30 +697,6 @@ static long ext4_ioctl_group_add(struct file *file,
return err;
 }
 
-static int ext4_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
-{
-   /*
-* Project Quota ID state is only allowed to change from within the init
-* namespace. Enforce that restriction only if we are trying to change
-* the quota ID state. Everything else is allowed in user namespaces.
-*/
-   if (current_user_ns() == _user_ns)
-   return 0;
-
-   if (__kprojid_val(EXT4_I(inode)->i_projid) != fa->fsx_projid)
-   return -EINVAL;
-
-   if (ext4_test_inode_flag(inode, EXT4_INODE_PROJINHERIT)) {
-   if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
-   return -EINVAL;
-   } else {
-   if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
-   return -EINVAL;
-   }
-
-   return 0;
-}
-
 static void ext4_fsgetxattr(struct inode *inode, struct fsxattr *fa)
 {
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -1135,9 +,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
 
inode_lock(inode);
ext4_fsgetxattr(inode, _fa);
-   err = ext4_ioctl_check_project(inode, );
-   if (err)
-   goto out;
err = vfs_ioc_fssetxattr_check(inode, _fa, );
if (err)
goto out;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f707de6bd4a8..183ed1ac60e1 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2799,30 +2799,6 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, 
unsigned long arg)
return 0;
 }
 
-static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
-{
-   /*
-* Project Quota ID state is only allowed to change from within the init
-* namespace. Enforce that restriction only if we are trying to change
-* the quota ID state. Everything else is allowed in user namespaces.
-*/
-   if (current_user_ns() == _user_ns)
-   return 0;
-
-   if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid)
-   return -EINVAL;
-
-   if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) {
-   if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
-   return -EINVAL;
-   } else {
-   if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
-   return -EINVAL;
-   }
-
-   return 0;
-}
-
 static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
 {
struct inode *inode = file_inode(filp);
@@ -2850,9 +2826,6 @@ static int f2fs_ioc_fssetxattr(struct file *filp, 
unsigned long arg)
return err;
 
inode_lock(inode);
-   err = f2fs_ioctl_check_project(inode, );
-   if (err)
-   goto out;
 
__f2fs_ioc_fsgetxattr(inode, _fa);
err = vfs_ioc_fssetxattr_check(inode, _fa, );
diff --git a/fs/inode.c b/fs/inode.c
index ddfe60679b53..f1ffa9605078 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2221,6 +2221,19 @@ int vfs_ioc_fssetxattr_check(struct inode *inode, const 
struct fsxattr *old_fa,
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
 
+   /*
+* Project Quota ID state is only allowed to change from within the init
+* namespace. Enforce that restriction only if we are trying to change
+* the quota ID state. Everything else is allowed in user namespaces.
+*/
+   if (current_user_ns() != _user_ns) {
+   if (old_fa->fsx_projid != fa->fsx_projid)
+   return -EINVAL;
+   if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
+   FS_XFLAG_PROJINHERIT)
+   return -EINVAL;
+   }
+
return 0;
 }
 EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 08c24f2f55c3..82961de98900 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1299,21 +1299,6 @@ xfs_ioctl_setattr_check_projid(
if (fa->fsx_projid > (uint16_t)-1 &&
!xfs_sb_version_hasprojid32bit(>i_mount->m_sb))
return -EINVAL;
-
-   /*
-* Project Quota ID state is only allowed to change from within the init
-* namespace. Enforce that restriction only if we are trying to change
-* the quota ID state. Everything 

[f2fs-dev] [PATCH 2/4] vfs: create a generic checking function for FS_IOC_FSSETXATTR

2019-06-21 Thread Darrick J. Wong
From: Darrick J. Wong 

Create a generic checking function for the incoming FS_IOC_FSSETXATTR
fsxattr values so that we can standardize some of the implementation
behaviors.

Signed-off-by: Darrick J. Wong 
Reviewed-by: Jan Kara 
---
 fs/btrfs/ioctl.c   |   21 +---
 fs/ext4/ioctl.c|   27 ++--
 fs/f2fs/file.c |   26 ++-
 fs/inode.c |   17 +
 fs/xfs/xfs_ioctl.c |   70 ++--
 include/linux/fs.h |3 ++
 6 files changed, 111 insertions(+), 53 deletions(-)


diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f408aa93b0cf..7ddda5b4b6a6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -366,6 +366,13 @@ static int check_xflags(unsigned int flags)
return 0;
 }
 
+static void __btrfs_ioctl_fsgetxattr(struct btrfs_inode *binode,
+struct fsxattr *fa)
+{
+   memset(fa, 0, sizeof(*fa));
+   fa->fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
+}
+
 /*
  * Set the xflags from the internal inode flags. The remaining items of fsxattr
  * are zeroed.
@@ -375,8 +382,7 @@ static int btrfs_ioctl_fsgetxattr(struct file *file, void 
__user *arg)
struct btrfs_inode *binode = BTRFS_I(file_inode(file));
struct fsxattr fa;
 
-   memset(, 0, sizeof(fa));
-   fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
+   __btrfs_ioctl_fsgetxattr(binode, );
 
if (copy_to_user(arg, , sizeof(fa)))
return -EFAULT;
@@ -390,7 +396,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void 
__user *arg)
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
-   struct fsxattr fa;
+   struct fsxattr fa, old_fa;
unsigned old_flags;
unsigned old_i_flags;
int ret = 0;
@@ -421,13 +427,10 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void 
__user *arg)
old_flags = binode->flags;
old_i_flags = inode->i_flags;
 
-   /* We need the capabilities to change append-only or immutable inode */
-   if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
-(fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
-   !capable(CAP_LINUX_IMMUTABLE)) {
-   ret = -EPERM;
+   __btrfs_ioctl_fsgetxattr(binode, _fa);
+   ret = vfs_ioc_fssetxattr_check(inode, _fa, );
+   if (ret)
goto out_unlock;
-   }
 
if (fa.fsx_xflags & FS_XFLAG_SYNC)
binode->flags |= BTRFS_INODE_SYNC;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5126ee351a84..c2f48c90ca45 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -721,6 +721,19 @@ static int ext4_ioctl_check_project(struct inode *inode, 
struct fsxattr *fa)
return 0;
 }
 
+static void ext4_fsgetxattr(struct inode *inode, struct fsxattr *fa)
+{
+   struct ext4_inode_info *ei = EXT4_I(inode);
+
+   memset(fa, 0, sizeof(struct fsxattr));
+   fa->fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & 
EXT4_FL_USER_VISIBLE);
+
+   if (ext4_has_feature_project(inode->i_sb)) {
+   fa->fsx_projid = (__u32)from_kprojid(_user_ns,
+   ei->i_projid);
+   }
+}
+
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
struct inode *inode = file_inode(filp);
@@ -1089,13 +1102,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
{
struct fsxattr fa;
 
-   memset(, 0, sizeof(struct fsxattr));
-   fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & 
EXT4_FL_USER_VISIBLE);
-
-   if (ext4_has_feature_project(inode->i_sb)) {
-   fa.fsx_projid = (__u32)from_kprojid(_user_ns,
-   EXT4_I(inode)->i_projid);
-   }
+   ext4_fsgetxattr(inode, );
 
if (copy_to_user((struct fsxattr __user *)arg,
 , sizeof(fa)))
@@ -1104,7 +,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
}
case EXT4_IOC_FSSETXATTR:
{
-   struct fsxattr fa;
+   struct fsxattr fa, old_fa;
int err;
 
if (copy_from_user(, (struct fsxattr __user *)arg,
@@ -1127,7 +1134,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
return err;
 
inode_lock(inode);
+   ext4_fsgetxattr(inode, _fa);
err = ext4_ioctl_check_project(inode, );
+   if (err)
+   goto out;
+   err = vfs_ioc_fssetxattr_check(inode, _fa, );
if (err)
goto out;
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
diff --git a/fs/f2fs/file.c 

[f2fs-dev] [PATCH v2 0/4] vfs: clean up SETFLAGS and FSSETXATTR option processing

2019-06-21 Thread Darrick J. Wong
Hi all,

The FS_IOC_SETFLAGS and FS_IOC_FSSETXATTR ioctls were promoted from ext4
and XFS, respectively, into the VFS.  However, we didn't promote any of
the parameter checking code from those filesystems, which lead to a mess
where each filesystem open-codes whatever parameter checks they want and
the behavior across filesystems is no longer consistent.

Therefore, create some generic checking functions in the VFS and remove
all the open-coded pieces in each filesystem.  This preserves the
current behavior where a filesystem can choose to ignore fields it
doesn't understand.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This has been lightly tested with fstests.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=file-ioctl-cleanups


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH V3 0/7] Consolidate FS read I/O callbacks code

2019-06-21 Thread Eric Biggers
On Sun, Jun 16, 2019 at 09:38:06PM +0530, Chandan Rajendra wrote:
> This patchset moves the "FS read I/O callbacks" code into a file of its
> own (i.e. fs/read_callbacks.c) and modifies the generic
> do_mpage_readpge() to make use of the functionality provided.
> 
> "FS read I/O callbacks" code implements the state machine that needs
> to be executed after reading data from files that are encrypted and/or
> have verity metadata associated with them.
> 
> With these changes in place, the patchset changes Ext4 to use
> mpage_readpage[s] instead of its own custom ext4_readpage[s]()
> functions. This is done to reduce duplication of code across
> filesystems. Also, "FS read I/O callbacks" source files will be built
> only if CONFIG_FS_ENCRYPTION is enabled.
> 
> The patchset also modifies fs/buffer.c to get file
> encryption/decryption to work with subpage-sized blocks.
> 
> The patches can also be obtained from
> https://github.com/chandanr/linux.git at branch subpage-encryption-v3.
> 

FWIW: while doing my review I put together an (untested) incremental patch that
addresses my comments on the code, so I've provided it below in case you want to
start with it when addressing my comments.

This is just a single diff against your subpage-encryption-v3 branch, so of
course it would still need to be folded into the appropriate patches.  Also see
my suggestions in reply to patch 2 about how to better organize the series.  I
also left TODOs in kerneldoc comments that still need to be updated.

diff --git a/fs/Kconfig b/fs/Kconfig
index d869859c88da18..d95897a0f3d052 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -22,7 +22,7 @@ config FS_IOMAP
bool
 
 config FS_READ_CALLBACKS
-   bool
+   bool
 
 source "fs/ext2/Kconfig"
 source "fs/ext4/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index a1a06f0db5c181..81854d8161dea7 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -21,8 +21,6 @@ else
 obj-y +=   no-block.o
 endif
 
-obj-$(CONFIG_FS_READ_CALLBACKS) += read_callbacks.o
-
 obj-$(CONFIG_PROC_FS) += proc_namespace.o
 
 obj-y  += notify/
@@ -55,6 +53,7 @@ obj-$(CONFIG_SYSCTL)  += drop_caches.o
 
 obj-$(CONFIG_FHANDLE)  += fhandle.o
 obj-$(CONFIG_FS_IOMAP) += iomap.o
+obj-$(CONFIG_FS_READ_CALLBACKS)+= read_callbacks.o
 
 obj-y  += quota/
 
diff --git a/fs/buffer.c b/fs/buffer.c
index dcb67525dac91d..bfda6af7ea1042 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -247,7 +247,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t 
block)
return ret;
 }
 
-void end_buffer_async_read(struct buffer_head *bh)
+void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 {
unsigned long flags;
struct buffer_head *first;
@@ -255,7 +255,17 @@ void end_buffer_async_read(struct buffer_head *bh)
struct page *page;
int page_uptodate = 1;
 
+   BUG_ON(!buffer_async_read(bh));
+
page = bh->b_page;
+   if (uptodate) {
+   set_buffer_uptodate(bh);
+   } else {
+   clear_buffer_uptodate(bh);
+   buffer_io_error(bh, ", async page read");
+   SetPageError(page);
+   }
+
/*
 * Be _very_ careful from here on. Bad things can happen if
 * two buffer heads end IO at almost the same time and both
@@ -298,25 +308,9 @@ void end_buffer_async_read(struct buffer_head *bh)
  * I/O completion handler for block_read_full_page().  Pages are unlocked
  * after the I/O completes and the read callbacks (if any) have executed.
  */
-static void __end_buffer_async_read(struct buffer_head *bh, int uptodate)
+static void end_buffer_async_read_cbs(struct buffer_head *bh, int uptodate)
 {
-   struct page *page;
-
-   BUG_ON(!buffer_async_read(bh));
-
-   if (read_callbacks_end_bh(bh, uptodate))
-   return;
-
-   page = bh->b_page;
-   if (uptodate) {
-   set_buffer_uptodate(bh);
-   } else {
-   clear_buffer_uptodate(bh);
-   buffer_io_error(bh, ", async page read");
-   SetPageError(page);
-   }
-
-   end_buffer_async_read(bh);
+   read_callbacks_endio_bh(bh, uptodate);
 }
 
 /*
@@ -391,7 +385,7 @@ EXPORT_SYMBOL(end_buffer_async_write);
  */
 static void mark_buffer_async_read(struct buffer_head *bh)
 {
-   bh->b_end_io = __end_buffer_async_read;
+   bh->b_end_io = end_buffer_async_read_cbs;
set_buffer_async_read(bh);
 }
 
@@ -2307,10 +2301,10 @@ int block_read_full_page(struct page *page, get_block_t 
*get_block)
for (i = 0; i < nr; i++) {
bh = arr[i];
if (buffer_uptodate(bh)) {
-   __end_buffer_async_read(bh, 1);
+   end_buffer_async_read(bh, 1);
} else {
-   if (WARN_ON(read_callbacks_setup(inode, NULL, bh, 
NULL))) {
-   __end_buffer_async_read(bh, 0);
+ 

Re: [f2fs-dev] [PATCH V3 3/7] fscrypt: remove struct fscrypt_ctx

2019-06-21 Thread Eric Biggers
On Sun, Jun 16, 2019 at 09:38:09PM +0530, Chandan Rajendra wrote:
> -/**
> - * fscrypt_get_ctx() - Get a decryption context
> - * @gfp_flags:   The gfp flag for memory allocation
> - *
> - * Allocate and initialize a decryption context.
> - *
> - * Return: A new decryption context on success; an ERR_PTR() otherwise.
> - */
> -struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
> -{
> - struct fscrypt_ctx *ctx;
> - unsigned long flags;
> -
> - /*
> -  * First try getting a ctx from the free list so that we don't have to
> -  * call into the slab allocator.
> -  */
> - spin_lock_irqsave(_ctx_lock, flags);
> - ctx = list_first_entry_or_null(_free_ctxs,
> - struct fscrypt_ctx, free_list);
> - if (ctx)
> - list_del(>free_list);
> - spin_unlock_irqrestore(_ctx_lock, flags);
> - if (!ctx) {
> - ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags);
> - if (!ctx)
> - return ERR_PTR(-ENOMEM);
> - ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
> - } else {
> - ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
> - }
> - return ctx;
> -}
> -EXPORT_SYMBOL(fscrypt_get_ctx);

FS_CTX_REQUIRES_FREE_ENCRYPT_FL is no longer used, so should be removed.

- Eric


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH V3 6/7] Add decryption support for sub-pagesized blocks

2019-06-21 Thread Eric Biggers
On Sun, Jun 16, 2019 at 09:38:12PM +0530, Chandan Rajendra wrote:
> To support decryption of sub-pagesized blocks this commit adds code to,
> 1. Track buffer head in "struct read_callbacks_ctx".
> 2. Pass buffer head argument to all read callbacks.
> 3. Add new fscrypt helper to decrypt the file data referred to by a
>buffer head.
> 
> Signed-off-by: Chandan Rajendra 
> ---
>  fs/buffer.c|  55 +--
>  fs/crypto/bio.c|  21 +-
>  fs/f2fs/data.c |   2 +-
>  fs/mpage.c |   2 +-
>  fs/read_callbacks.c| 118 +
>  include/linux/buffer_head.h|   1 +
>  include/linux/read_callbacks.h |  13 +++-
>  7 files changed, 158 insertions(+), 54 deletions(-)
> 

This is another patch that unnecessarily changes way too many components at
once.  My suggestions elsewhere would resolve this, though:

- This patch changes fs/f2fs/data.c and fs/mpage.c only to pass a NULL
  buffer_head to read_callbacks_setup().  But as per my comments on patch 1,
  read_callbacks_setup() should be split into read_callbacks_setup_bio() and
  read_callbacks_end_bh().

- This patch changes fs/crypto/ only to add support for the buffer_head
  decryption work.  But as per my comments on patch 1, that should be in
  read_callbacks.c instead.

And adding buffer_head support to fs/read_callbacks.c should be its own patch,
*or* should simply be folded into the patch that adds fs/read_callbacks.c.

Then the only thing remaining in this patch would be updating fs/buffer.c to
make it use the read_callbacks, which should be retitled to something like
"fs/buffer.c: add decryption support via read_callbacks".

- Eric


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH V3 4/7] fs/mpage.c: Integrate read callbacks

2019-06-21 Thread Eric Biggers
On Sun, Jun 16, 2019 at 09:38:10PM +0530, Chandan Rajendra wrote:
> This commit adds code to make do_mpage_readpage() to be "read callbacks"
> aware i.e. for files requiring decryption, do_mpage_readpage() now
> sets up the read callbacks state machine when allocating a bio and later
> starts execution of the state machine after file data is read from the
> underlying disk.
> 
> Signed-off-by: Chandan Rajendra 
> ---
>  fs/mpage.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/mpage.c b/fs/mpage.c
> index 436a85260394..611ad122fc92 100644
> --- a/fs/mpage.c
> +++ b/fs/mpage.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "internal.h"
>  
>  /*
> @@ -49,6 +50,8 @@ static void mpage_end_io(struct bio *bio)
>   struct bio_vec *bv;
>   struct bvec_iter_all iter_all;
>  
> + if (read_callbacks_end_bio(bio))
> + return;
>   bio_for_each_segment_all(bv, bio, iter_all) {
>   struct page *page = bv->bv_page;
>   page_endio(page, bio_op(bio),
> @@ -309,6 +312,12 @@ static struct bio *do_mpage_readpage(struct 
> mpage_readpage_args *args)
>   gfp);
>   if (args->bio == NULL)
>   goto confused;
> +
> + if (read_callbacks_setup(inode, args->bio, NULL)) {
> + bio_put(args->bio);
> + args->bio = NULL;
> + goto confused;
> + }
>   }
>  
>   length = first_hole << blkbits;
> @@ -330,7 +339,7 @@ static struct bio *do_mpage_readpage(struct 
> mpage_readpage_args *args)
>  confused:
>   if (args->bio)
>   args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
> - if (!PageUptodate(page))
> + if (!PageUptodate(page) && !PageError(page))
>   block_read_full_page(page, args->get_block);
>   else
>   unlock_page(page);
> -- 
> 2.19.1

Why is the !PageError() check needed here?

- Eric


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH V3 2/7] Integrate read callbacks into Ext4 and F2FS

2019-06-21 Thread Eric Biggers
Hi Chandan,

On Sun, Jun 16, 2019 at 09:38:08PM +0530, Chandan Rajendra wrote:
> This commit gets Ext4 and F2FS to make use of read callbacks API to
> perform decryption of file data read from the disk.
> ---
>  fs/crypto/bio.c |  30 +
>  fs/crypto/crypto.c  |   1 +
>  fs/crypto/fscrypt_private.h |   3 +
>  fs/ext4/readpage.c  |  29 +++--
>  fs/f2fs/data.c  | 124 +++-
>  fs/f2fs/super.c |   9 +--
>  fs/read_callbacks.c |   1 -
>  include/linux/fscrypt.h |  18 --
>  8 files changed, 40 insertions(+), 175 deletions(-)
> 

This patch changes many different components.  It would be much easier to
review, and might get more attention from the other ext4 and f2fs developers, if
it were split into 3 patches:

a. Convert ext4 to use read_callbacks.
b. Convert f2fs to use read_callbacks.
c. Remove the functions from fs/crypto/ that became unused as a result of
   patches (a) and (b).  (Actually, this part probably should be merged with the
   patch that removes the fscrypt_ctx, and the patch renamed to something like
   "fscrypt: remove decryption I/O path helpers")

Any reason why this wouldn't work?  AFAICS, you couldn't do it only because you
made this patch change fscrypt_enqueue_decrypt_work() to be responsible for
initializing the work function.  But as per my comments on patch 1, I don't
think we should do that, since it would make much more sense to put the work
function in read_callbacks.c.

However, since you're converting ext4 to use mpage_readpages() anyway, I don't
think we should bother with the intermediate change to ext4_mpage_readpages().
It's useless, and that intermediate state of the ext4 code inevitably won't get
tested very well.  So perhaps order the whole series as:

- fs: introduce read_callbacks
- fs/mpage.c: add decryption support via read_callbacks
- fs/buffer.c: add decryption support via read_callbacks
- f2fs: convert to use read_callbacks
- ext4: convert to use mpage_readpages[s]
- ext4: support encryption with subpage-sized blocks
- fscrypt: remove decryption I/O path helpers

That order would also give the flexibility to possibly apply the fs/ changes
first, without having to update both ext4 and f2fs simultaneously with them.

> @@ -557,8 +511,7 @@ static struct bio *f2fs_grab_read_bio(struct inode 
> *inode, block_t blkaddr,
>  {
>   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>   struct bio *bio;
> - struct bio_post_read_ctx *ctx;
> - unsigned int post_read_steps = 0;
> + int ret;

Nit: 'err' rather than 'ret', since this is 0 or a -errno value.

> -int __init f2fs_init_post_read_processing(void)
> -{
> - bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
> - if (!bio_post_read_ctx_cache)
> - goto fail;
> - bio_post_read_ctx_pool =
> - mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
> -  bio_post_read_ctx_cache);
> - if (!bio_post_read_ctx_pool)
> - goto fail_free_cache;
> - return 0;
> -
> -fail_free_cache:
> - kmem_cache_destroy(bio_post_read_ctx_cache);
> -fail:
> - return -ENOMEM;
> -}
> -
> -void __exit f2fs_destroy_post_read_processing(void)
> -{
> - mempool_destroy(bio_post_read_ctx_pool);
> - kmem_cache_destroy(bio_post_read_ctx_cache);
> -}

Need to remove the declarations of these functions from fs/f2fs/f2fs.h to.

> diff --git a/fs/read_callbacks.c b/fs/read_callbacks.c
> index a4196e3de05f..4b7fc2a349cd 100644
> --- a/fs/read_callbacks.c
> +++ b/fs/read_callbacks.c
> @@ -76,7 +76,6 @@ void read_callbacks(struct read_callbacks_ctx *ctx)
>   switch (++ctx->cur_step) {
>   case STEP_DECRYPT:
>   if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
> - INIT_WORK(>work, fscrypt_decrypt_work);
>   fscrypt_enqueue_decrypt_work(>work);
>   return;
>   }

Again, I think the work initialization should remain here as:

INIT_WORK(>work, decrypt_work);

rather than moving it to fs/crypto/.

Thanks!

- Eric


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH V3 1/7] FS: Introduce read callbacks

2019-06-21 Thread Eric Biggers
Hi Chandan,

On Sun, Jun 16, 2019 at 09:38:07PM +0530, Chandan Rajendra wrote:
> Read callbacks implements a state machine to be executed after a
> buffered read I/O is completed. They help in further processing the file
> data read from the backing store. Currently, decryption is the only post
> processing step to be supported.
> 
> The execution of the state machine is to be initiated by the endio
> function associated with the read operation.
> 
> Signed-off-by: Chandan Rajendra 
> ---
>  fs/Kconfig |   3 +
>  fs/Makefile|   2 +
>  fs/crypto/Kconfig  |   1 +
>  fs/crypto/bio.c|  11 +++
>  fs/read_callbacks.c| 174 +
>  include/linux/fscrypt.h|   5 +
>  include/linux/read_callbacks.h |  38 +++
>  7 files changed, 234 insertions(+)
>  create mode 100644 fs/read_callbacks.c
>  create mode 100644 include/linux/read_callbacks.h
> 
> diff --git a/fs/Kconfig b/fs/Kconfig
> index f1046cf6ad85..d869859c88da 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -21,6 +21,9 @@ if BLOCK
>  config FS_IOMAP
>   bool
>  
> +config FS_READ_CALLBACKS
> +   bool

This should be intended with a tab, not spaces.

> +
>  source "fs/ext2/Kconfig"
>  source "fs/ext4/Kconfig"
>  source "fs/jbd2/Kconfig"
> diff --git a/fs/Makefile b/fs/Makefile
> index c9aea23aba56..a1a06f0db5c1 100644
> --- a/fs/Makefile
> +++ b/fs/Makefile
> @@ -21,6 +21,8 @@ else
>  obj-y += no-block.o
>  endif
>  
> +obj-$(CONFIG_FS_READ_CALLBACKS) += read_callbacks.o
> +
>  obj-$(CONFIG_PROC_FS) += proc_namespace.o

Nit: maybe move this to just below the line for iomap.o, to be consistent with
where FS_READ_CALLBACKS is in the Kconfig file.

>  
>  obj-y+= notify/
> diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
> index 24ed99e2eca0..7752f9964280 100644
> --- a/fs/crypto/Kconfig
> +++ b/fs/crypto/Kconfig
> @@ -9,6 +9,7 @@ config FS_ENCRYPTION
>   select CRYPTO_CTS
>   select CRYPTO_SHA256
>   select KEYS
> + select FS_READ_CALLBACKS if BLOCK
>   help
> Enable encryption of files and directories.  This
> feature is similar to ecryptfs, but it is more memory
> diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
> index 82da2510721f..f677ff93d464 100644
> --- a/fs/crypto/bio.c
> +++ b/fs/crypto/bio.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "fscrypt_private.h"
>  
>  static void __fscrypt_decrypt_bio(struct bio *bio, bool done)
> @@ -68,6 +69,16 @@ void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, 
> struct bio *bio)
>  }
>  EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio);
>  
> +void fscrypt_decrypt_work(struct work_struct *work)
> +{
> + struct read_callbacks_ctx *ctx =
> + container_of(work, struct read_callbacks_ctx, work);
> +
> + fscrypt_decrypt_bio(ctx->bio);
> +
> + read_callbacks(ctx);
> +}
> +

This seems like a layering violation, since it means that read_callbacks.c calls
fs/crypto/ *and* fs/crypto/ calls read_callbacks.c.  I don't think fs/crypto/
should be aware of read_callbacks at all.  Instead we should have a clear
ordering between the components: the filesystem uses read_callbacks.c and
fs/crypto/, and read_callbacks.c uses fs/crypto/.  So how about:

- Move fscrypt_decrypt_work(), fscrypt_decrypt_bh(), and fscrypt_decrypt_bio()
  into fs/read_callbacks.c and remove the "fscrypt_" prefix from them.

- Instead of selecting FS_READ_CALLBACKS from FS_ENCRYPTION, select it from
  EXT4_FS and F2FS_FS (if FS_ENCRYPTION).  I.e., it's really the *filesystems*
  themselves that use read_callbacks, not fs/crypto/.

- Move the definition of read_callbacks_ctx into fs/read_callbacks.c, and make
  read_callbacks() static, so these are private to the read_callbacks component.

>  int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
>   sector_t pblk, unsigned int len)
>  {
> diff --git a/fs/read_callbacks.c b/fs/read_callbacks.c
> new file mode 100644
> index ..a4196e3de05f
> --- /dev/null
> +++ b/fs/read_callbacks.c
> @@ -0,0 +1,174 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * This file tracks the state machine that needs to be executed after reading
> + * data from files that are encrypted and/or have verity metadata associated
> + * with them.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define NUM_PREALLOC_READ_CALLBACK_CTXS  128
> +
> +static struct kmem_cache *read_callbacks_ctx_cache;
> +static mempool_t *read_callbacks_ctx_pool;
> +
> +/* Read callback state machine steps */
> +enum read_callbacks_step {
> + STEP_INITIAL = 0,
> + STEP_DECRYPT,
> +};
> +
> +static void put_read_callbacks_ctx(struct read_callbacks_ctx *ctx)
> +{
> + mempool_free(ctx, read_callbacks_ctx_pool);
> +}

Maybe call this free_read_callbacks_ctx(), so that it doesn't sound like it's

[f2fs-dev] [PATCH] f2fs: add wsync_mode for sysfs entry

2019-06-21 Thread Jaegeuk Kim
From: Jaegeuk Kim 

This add one sysfs entry to control REQ_SYNC/REQ_BACKGROUND for write bios
for data page writes.

Signed-off-by: Jaegeuk Kim 
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  7 +++
 Documentation/filesystems/f2fs.txt  |  4 
 fs/f2fs/data.c  |  3 +--
 fs/f2fs/f2fs.h  | 12 
 fs/f2fs/sysfs.c |  2 ++
 5 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index dca326e0ee3e..d3eca3eb3214 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -251,3 +251,10 @@ Description:
If checkpoint=disable, it displays the number of blocks that 
are unusable.
 If checkpoint=enable it displays the enumber of blocks that 
would be unusable
 if checkpoint=disable were to be set.
+
+What:  /sys/fs/f2fs//wsync_mode
+Date   June 2019
+Contact:   "Jaegeuk Kim" 
+Description:
+   0 gives no change. 1 assigns all the data writes with REQ_SYNC.
+2 does REQ_BACKGROUND instead.
diff --git a/Documentation/filesystems/f2fs.txt 
b/Documentation/filesystems/f2fs.txt
index bebd1be3ba49..81c529801a88 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -413,6 +413,10 @@ Files in /sys/fs/f2fs/
   that would be unusable if checkpoint=disable were
   to be set.
 
+ wsync_mode   0 is by default. 1 gives REQ_SYNC for all the 
data
+  writes. 2 gives REQ_BACKGROUND for all. This can
+  used for the performance tuning purpose.
+
 

 USAGE
 

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f4e1672bd96e..18c73a1fdef3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -9,7 +9,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -2021,7 +2020,7 @@ static int __write_data_page(struct page *page, bool 
*submitted,
.ino = inode->i_ino,
.type = DATA,
.op = REQ_OP_WRITE,
-   .op_flags = wbc_to_write_flags(wbc),
+   .op_flags = f2fs_wbc_to_write_flags(sbi, wbc),
.old_blkaddr = NULL_ADDR,
.page = page,
.encrypted_page = NULL,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2be2b16573c3..1cc46a6dc340 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1264,6 +1265,7 @@ struct f2fs_sb_info {
 
/* writeback control */
atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */
+   int wsync_mode; /* write mode */
 
/* valid inode count */
struct percpu_counter total_valid_inode_count;
@@ -3631,6 +3633,16 @@ static inline void set_opt_mode(struct f2fs_sb_info 
*sbi, unsigned int mt)
}
 }
 
+static inline int f2fs_wbc_to_write_flags(struct f2fs_sb_info *sbi,
+   struct writeback_control *wbc)
+{
+   if (sbi->wsync_mode == 1)
+   return REQ_SYNC;
+   if (sbi->wsync_mode == 2)
+   return REQ_BACKGROUND;
+   return wbc_to_write_flags(wbc);
+}
+
 static inline bool f2fs_may_encrypt(struct inode *inode)
 {
 #ifdef CONFIG_FS_ENCRYPTION
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 3aeacd0aacfd..e3c164d921a1 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -455,6 +455,7 @@ F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
 F2FS_GENERAL_RO_ATTR(features);
 F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
 F2FS_GENERAL_RO_ATTR(unusable);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, wsync_mode, wsync_mode);
 
 #ifdef CONFIG_FS_ENCRYPTION
 F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -515,6 +516,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
ATTR_LIST(current_reserved_blocks),
+   ATTR_LIST(wsync_mode),
NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
-- 
2.19.0.605.g01d371f741-goog



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [PATCH -next] f2fs: Use div_u64*() for 64-bit divisions

2019-06-21 Thread Jaegeuk Kim
On 06/21, Geert Uytterhoeven wrote:
> Hi Chao,
> 
> On Fri, Jun 21, 2019 at 11:54 AM Chao Yu  wrote:
> > Since the original patch hasn't been merged to upstream, I think we can 
> > merge
> > this into original patch, how do you think?
> 
> Thanks, that's fine for me.

Merged the fix.
Thank you so much.

> 
> > On 2019/6/20 22:38, Geert Uytterhoeven wrote:
> > > On 32-bit (e.g. m68k):
> > >
> > > fs/f2fs/gc.o: In function `f2fs_resize_fs':
> > > gc.c:(.text+0x3056): undefined reference to `__umoddi3'
> > > gc.c:(.text+0x30c4): undefined reference to `__udivdi3'
> > >
> > > Fix this by using div_u64_rem() and div_u64() for 64-by-32 modulo resp.
> > > division operations.
> > >
> > > Reported-by: nore...@ellerman.id.au
> > > Fixes: d2ae7494d043bfaf ("f2fs: ioctl for removing a range from F2FS")
> > > Signed-off-by: Geert Uytterhoeven 
> > > ---
> > > This assumes BLKS_PER_SEC(sbi) is 32-bit.
> > >
> > > #define BLKS_PER_SEC(sbi)   \
> > >   ((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
> > >
> > > Notes:
> > >   1. f2fs_sb_info.segs_per_sec and f2fs_sb_info.blocks_per_seg are both
> > >  unsigned int,
> > >   2. The multiplication is done in 32-bit arithmetic, hence the result
> > >  is of type unsigned int.
> > >   3. Is it guaranteed that the result will always fit in 32-bit, or can
> > >  this overflow?
> > >   4. fs/f2fs/debug.c:update_sit_info() assigns BLKS_PER_SEC(sbi) to
> > >  unsigned long long blks_per_sec, anticipating a 64-bit value.
> > > ---
> > >  fs/f2fs/gc.c | 6 --
> > >  1 file changed, 4 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > > index 5b1076505ade9f84..c65f87f11de029f4 100644
> > > --- a/fs/f2fs/gc.c
> > > +++ b/fs/f2fs/gc.c
> > > @@ -1438,13 +1438,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, 
> > > __u64 block_count)
> > >   unsigned int secs;
> > >   int gc_mode, gc_type;
> > >   int err = 0;
> > > + __u32 rem;
> > >
> > >   old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count);
> > >   if (block_count > old_block_count)
> > >   return -EINVAL;
> > >
> > >   /* new fs size should align to section size */
> > > - if (block_count % BLKS_PER_SEC(sbi))
> > > + div_u64_rem(block_count, BLKS_PER_SEC(sbi), );
> > > + if (rem)
> > >   return -EINVAL;
> > >
> > >   if (block_count == old_block_count)
> > > @@ -1463,7 +1465,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> > > block_count)
> > >   freeze_bdev(sbi->sb->s_bdev);
> > >
> > >   shrunk_blocks = old_block_count - block_count;
> > > - secs = shrunk_blocks / BLKS_PER_SEC(sbi);
> > > + secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
> > >   spin_lock(>stat_lock);
> > >   if (shrunk_blocks + valid_user_blocks(sbi) +
> > >   sbi->current_reserved_blocks + sbi->unusable_block_count +
> 
> Gr{oetje,eeting}s,
> 
> Geert
> 
> -- 
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
> -- Linus Torvalds


Re: [f2fs-dev] [PATCH v3] f2fs: add a rw_sem to cover quota flag changes

2019-06-21 Thread Jaegeuk Kim
On 06/20, Chao Yu wrote:
> On 2019/6/20 1:26, Jaegeuk Kim wrote:
> > On 06/18, Chao Yu wrote:
> >> On 2019/6/14 10:46, Jaegeuk Kim wrote:
> >>> On 06/11, Chao Yu wrote:
>  On 2019/6/5 2:36, Jaegeuk Kim wrote:
> > Two paths to update quota and f2fs_lock_op:
> >
> > 1.
> >  - lock_op
> >  |  - quota_update
> >  `- unlock_op
> >
> > 2.
> >  - quota_update
> >  - lock_op
> >  `- unlock_op
> >
> > But, we need to make a transaction on quota_update + lock_op in #2 case.
> > So, this patch introduces:
> > 1. lock_op
> > 2. down_write
> > 3. check __need_flush
> > 4. up_write
> > 5. if there is dirty quota entries, flush them
> > 6. otherwise, good to go
> >
> > Signed-off-by: Jaegeuk Kim 
> > ---
> >
> > v3 from v2:
> >  - refactor to fix quota corruption issue
> >   : it seems that the previous scenario is not real and no deadlock 
> > case was
> > encountered.
> 
>  - f2fs_dquot_commit
>   - down_read(>quota_sem)
>   - block_operation
>    - f2fs_lock_all
> - need_flush_quota
>  - down_write(>quota_sem)
>    - f2fs_quota_write
> - f2fs_lock_op
> 
>  Why can't this happen?
> 
>  Once more question, should we hold quota_sem during checkpoint to avoid 
>  further
>  quota update? f2fs_lock_op can do this job as well?
> >>>
> >>> I couldn't find write_dquot() call to make this happen, and f2fs_lock_op 
> >>> was not
> >>
> >> - f2fs_dquot_commit
> >>  - dquot_commit
> >>   ->commit_dqblk (v2_write_dquot)
> >>- qtree_write_dquot
> >> ->quota_write (f2fs_quota_write)
> >>  - f2fs_lock_op
> >>
> >> Do you mean there is no such way that calling f2fs_lock_op() from
> >> f2fs_quota_write()? So that deadlock condition is not existing?
> > 
> > I mean write_dquot->f2fs_dquot_commit and block_operation seems not racing
> > together.
> 
> quota ioctl has the path calling write_dquot->f2fs_dquot_commit as below, 
> which
> can race with checkpoint().
> 
> - do_quotactl
>  - sb->s_qcop->quota_sync (f2fs_quota_sync)
>   - down_read(>quota_sem);    First
>- dquot_writeback_dquots
> - sb->dq_op->write_dquot (f2fs_dquot_commit)
>   - block_operation can 
> race here
>  - down_read(>quota_sem);     Second

Adding f2fs_lock_op() in f2fs_quota_sync() should be fine?

> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>> enough to cover quota updates. Current stress & power-cut tests are 
> >>> running for
> >>> several days without problem with this patch.
> >>>
> 
>  Thanks,
> 
> >
> >  fs/f2fs/checkpoint.c | 41 +++--
> >  fs/f2fs/f2fs.h   |  1 +
> >  fs/f2fs/super.c  | 26 +-
> >  3 files changed, 41 insertions(+), 27 deletions(-)
> >
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 89825261d474..43f65f0962e5 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1131,17 +1131,24 @@ static void __prepare_cp_block(struct 
> > f2fs_sb_info *sbi)
> >  
> >  static bool __need_flush_quota(struct f2fs_sb_info *sbi)
> >  {
> > +   bool ret = false;
> > +
> > if (!is_journalled_quota(sbi))
> > return false;
> > -   if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
> > -   return false;
> > -   if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
> > -   return false;
> > -   if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
> > -   return true;
> > -   if (get_pages(sbi, F2FS_DIRTY_QDATA))
> > -   return true;
> > -   return false;
> > +
> > +   down_write(>quota_sem);
> > +   if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
> > +   ret = false;
> > +   } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
> > +   ret = false;
> > +   } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) {
> > +   clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
> > +   ret = true;
> > +   } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
> > +   ret = true;
> > +   }
> > +   up_write(>quota_sem);
> > +   return ret;
> >  }
> >  
> >  /*
> > @@ -1160,26 +1167,22 @@ static int block_operations(struct f2fs_sb_info 
> > *sbi)
> > blk_start_plug();
> >  
> >  retry_flush_quotas:
> > +   f2fs_lock_all(sbi);
> > if (__need_flush_quota(sbi)) {
> > int locked;
> >  
> > if (++cnt > 

Re: [PATCH 1/2] f2fs: use generic EFSBADCRC/EFSCORRUPTED

2019-06-21 Thread Pavel Machek
On Thu 2019-06-20 11:36:14, Chao Yu wrote:
> f2fs uses EFAULT as error number to indicate filesystem is corrupted
> all the time, but generic filesystems use EUCLEAN for such condition,
> we need to change to follow others.
> 
> This patch adds two new macros as below to wrap more generic error
> code macros, and spread them in code.
> 
> EFSBADCRC EBADMSG /* Bad CRC detected */
> EFSCORRUPTED  EUCLEAN /* Filesystem is corrupted */
> 
> Reported-by: Pavel Machek 
> Signed-off-by: Chao Yu 

Acked-by: Pavel Machek 

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature


Re: [PATCH 2/2] f2fs: set SBI_NEED_FSCK for xattr corruption case

2019-06-21 Thread Pavel Machek
On Thu 2019-06-20 11:36:15, Chao Yu wrote:
> If xattr is corrupted, let's print kernel message and set SBI_NEED_FSCK
> for further repair.
> 
> Reported-by: Pavel Machek 
> Signed-off-by: Chao Yu 

Acked-by: Pavel Machek 

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature


Re: [PATCH] fsf2: Use DIV_ROUND_UP() instead of open-coding

2019-06-21 Thread Chao Yu
fsf2: Use DIV_ROUND_UP() instead of open-coding

fsf2 -> f2fs

Otherwise, it looks good to me.

Reviewed-by: Chao Yu 

Thanks,

On 2019/6/20 22:42, Geert Uytterhoeven wrote:
> Replace the open-coded divisions with round-up by calls to the
> DIV_ROUND_UP() helper macro.
> 
> Signed-off-by: Geert Uytterhoeven 
> ---
>  fs/f2fs/f2fs.h| 4 ++--
>  fs/f2fs/file.c| 6 +++---
>  fs/f2fs/segment.h | 2 +-
>  3 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 9afe15675dbbd369..52f477eaaee93bc3 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -480,8 +480,8 @@ static inline int get_inline_xattr_addrs(struct inode 
> *inode);
>  #define NR_INLINE_DENTRY(inode)  (MAX_INLINE_DATA(inode) * BITS_PER_BYTE 
> / \
>   ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
>   BITS_PER_BYTE + 1))
> -#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \
> - BITS_PER_BYTE - 1) / BITS_PER_BYTE)
> +#define INLINE_DENTRY_BITMAP_SIZE(inode) \
> + DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE)
>  #define INLINE_RESERVED_SIZE(inode)  (MAX_INLINE_DATA(inode) - \
>   ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
>   NR_INLINE_DENTRY(inode) + \
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 1180eca879331eba..fc00d8bdc31c18b0 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1211,7 +1211,7 @@ static int __exchange_data_block(struct inode 
> *src_inode,
>  static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
>  {
>   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> - pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
> + pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
>   pgoff_t start = offset >> PAGE_SHIFT;
>   pgoff_t end = (offset + len) >> PAGE_SHIFT;
>   int ret;
> @@ -1464,7 +1464,7 @@ static int f2fs_insert_range(struct inode *inode, 
> loff_t offset, loff_t len)
>   pg_start = offset >> PAGE_SHIFT;
>   pg_end = (offset + len) >> PAGE_SHIFT;
>   delta = pg_end - pg_start;
> - idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
> + idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
>  
>   /* avoid gc operation during block exchange */
>   down_write(_I(inode)->i_gc_rwsem[WRITE]);
> @@ -2362,7 +2362,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info 
> *sbi,
>   if (!fragmented)
>   goto out;
>  
> - sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
> + sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
>  
>   /*
>* make sure there are enough free section for LFS allocation, this can
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 166ac0f07a4e472d..2ae6df03b9982d12 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -109,7 +109,7 @@
>  #define  START_SEGNO(segno)  \
>   (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
>  #define SIT_BLK_CNT(sbi) \
> - ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
> + DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK)
>  #define f2fs_bitmap_size(nr) \
>   (BITS_TO_LONGS(nr) * sizeof(unsigned long))
>  
> 


Re: [f2fs-dev] [PATCH -next] f2fs: Use div_u64*() for 64-bit divisions

2019-06-21 Thread Geert Uytterhoeven
Hi Chao,

On Fri, Jun 21, 2019 at 11:54 AM Chao Yu  wrote:
> Since the original patch hasn't been merged to upstream, I think we can merge
> this into original patch, how do you think?

Thanks, that's fine for me.

> On 2019/6/20 22:38, Geert Uytterhoeven wrote:
> > On 32-bit (e.g. m68k):
> >
> > fs/f2fs/gc.o: In function `f2fs_resize_fs':
> > gc.c:(.text+0x3056): undefined reference to `__umoddi3'
> > gc.c:(.text+0x30c4): undefined reference to `__udivdi3'
> >
> > Fix this by using div_u64_rem() and div_u64() for 64-by-32 modulo resp.
> > division operations.
> >
> > Reported-by: nore...@ellerman.id.au
> > Fixes: d2ae7494d043bfaf ("f2fs: ioctl for removing a range from F2FS")
> > Signed-off-by: Geert Uytterhoeven 
> > ---
> > This assumes BLKS_PER_SEC(sbi) is 32-bit.
> >
> > #define BLKS_PER_SEC(sbi)   \
> >   ((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
> >
> > Notes:
> >   1. f2fs_sb_info.segs_per_sec and f2fs_sb_info.blocks_per_seg are both
> >  unsigned int,
> >   2. The multiplication is done in 32-bit arithmetic, hence the result
> >  is of type unsigned int.
> >   3. Is it guaranteed that the result will always fit in 32-bit, or can
> >  this overflow?
> >   4. fs/f2fs/debug.c:update_sit_info() assigns BLKS_PER_SEC(sbi) to
> >  unsigned long long blks_per_sec, anticipating a 64-bit value.
> > ---
> >  fs/f2fs/gc.c | 6 --
> >  1 file changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index 5b1076505ade9f84..c65f87f11de029f4 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -1438,13 +1438,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> > block_count)
> >   unsigned int secs;
> >   int gc_mode, gc_type;
> >   int err = 0;
> > + __u32 rem;
> >
> >   old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count);
> >   if (block_count > old_block_count)
> >   return -EINVAL;
> >
> >   /* new fs size should align to section size */
> > - if (block_count % BLKS_PER_SEC(sbi))
> > + div_u64_rem(block_count, BLKS_PER_SEC(sbi), );
> > + if (rem)
> >   return -EINVAL;
> >
> >   if (block_count == old_block_count)
> > @@ -1463,7 +1465,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> > block_count)
> >   freeze_bdev(sbi->sb->s_bdev);
> >
> >   shrunk_blocks = old_block_count - block_count;
> > - secs = shrunk_blocks / BLKS_PER_SEC(sbi);
> > + secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
> >   spin_lock(>stat_lock);
> >   if (shrunk_blocks + valid_user_blocks(sbi) +
> >   sbi->current_reserved_blocks + sbi->unusable_block_count +

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH -next] f2fs: Use div_u64*() for 64-bit divisions

2019-06-21 Thread Chao Yu
Hi Geert,

Since the original patch hasn't been merged to upstream, I think we can merge
this into original patch, how do you think?

On 2019/6/20 22:38, Geert Uytterhoeven wrote:
> On 32-bit (e.g. m68k):
> 
> fs/f2fs/gc.o: In function `f2fs_resize_fs':
> gc.c:(.text+0x3056): undefined reference to `__umoddi3'
> gc.c:(.text+0x30c4): undefined reference to `__udivdi3'
> 
> Fix this by using div_u64_rem() and div_u64() for 64-by-32 modulo resp.
> division operations.
> 
> Reported-by: nore...@ellerman.id.au
> Fixes: d2ae7494d043bfaf ("f2fs: ioctl for removing a range from F2FS")
> Signed-off-by: Geert Uytterhoeven 
> ---
> This assumes BLKS_PER_SEC(sbi) is 32-bit.
> 
> #define BLKS_PER_SEC(sbi)   \
>   ((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
> 
> Notes:
>   1. f2fs_sb_info.segs_per_sec and f2fs_sb_info.blocks_per_seg are both
>  unsigned int,
>   2. The multiplication is done in 32-bit arithmetic, hence the result
>  is of type unsigned int.
>   3. Is it guaranteed that the result will always fit in 32-bit, or can
>  this overflow?
>   4. fs/f2fs/debug.c:update_sit_info() assigns BLKS_PER_SEC(sbi) to
>  unsigned long long blks_per_sec, anticipating a 64-bit value.
> ---
>  fs/f2fs/gc.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 5b1076505ade9f84..c65f87f11de029f4 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1438,13 +1438,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> block_count)
>   unsigned int secs;
>   int gc_mode, gc_type;
>   int err = 0;
> + __u32 rem;
>  
>   old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count);
>   if (block_count > old_block_count)
>   return -EINVAL;
>  
>   /* new fs size should align to section size */
> - if (block_count % BLKS_PER_SEC(sbi))
> + div_u64_rem(block_count, BLKS_PER_SEC(sbi), );
> + if (rem)
>   return -EINVAL;
>  
>   if (block_count == old_block_count)
> @@ -1463,7 +1465,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 
> block_count)
>   freeze_bdev(sbi->sb->s_bdev);
>  
>   shrunk_blocks = old_block_count - block_count;
> - secs = shrunk_blocks / BLKS_PER_SEC(sbi);
> + secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
>   spin_lock(>stat_lock);
>   if (shrunk_blocks + valid_user_blocks(sbi) +
>   sbi->current_reserved_blocks + sbi->unusable_block_count +
> 


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel