Extend statx system call to return additional info for buffered atomic
write support for a file. Currently only direct IO is supported.

New flags STATX_WRITE_ATOMIC_BUF and STATX_ATTR_WRITE_ATOMIC_BUF are for
indicating whether the file knows and supports buffered atomic writes.

Structure statx members stx_atomic_write_unit_{min, max, segments_max}
will be reused for bufferd atomic writes. Flags STATX_WRITE_ATOMIC_DIO
and STATX_WRITE_ATOMIC_BUF are mutually exclusive. With both flags set,
statx will ignore the request and neither fields in statx.result_mask
will be set.

Also, make sure ext4 and xfs report atomic write unit min and max of 0
when the new flag is passed.

Co-developed-by: John Garry <[email protected]>
Signed-off-by: John Garry <[email protected]>
Signed-off-by: Ojaswin Mujoo <[email protected]>
---
 block/bdev.c                                  |   3 +-
 fs/ext4/inode.c                               |   7 +-
 fs/stat.c                                     |  33 +++--
 fs/xfs/xfs_file.c                             |   9 +-
 fs/xfs/xfs_iops.c                             | 121 ++++++++++--------
 fs/xfs/xfs_iops.h                             |   6 +-
 include/linux/fs.h                            |   3 +-
 include/trace/misc/fs.h                       |   1 +
 include/uapi/linux/stat.h                     |   2 +
 tools/include/uapi/linux/stat.h               |   2 +
 .../trace/beauty/include/uapi/linux/stat.h    |   2 +
 11 files changed, 119 insertions(+), 70 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 3bc90d5feb4c..8f0eab0a1ecf 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1335,8 +1335,7 @@ void bdev_statx(const struct path *path, struct kstat 
*stat, u32 request_mask)
 
                generic_fill_statx_atomic_writes(stat,
                        queue_atomic_write_unit_min_bytes(bd_queue),
-                       queue_atomic_write_unit_max_bytes(bd_queue),
-                       0);
+                       queue_atomic_write_unit_max_bytes(bd_queue), 0, true);
        }
 
        stat->blksize = bdev_io_min(bdev);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9555149a8ba6..0d5013993fba 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -6106,8 +6106,11 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct 
path *path,
                        awu_max = sbi->s_awu_max;
                }
 
-               generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0);
-       }
+               generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0,
+                                                true);
+       } else if (request_mask & STATX_WRITE_ATOMIC_BUF)
+               /* Atomic writes for buferred IO not supported yet */
+               generic_fill_statx_atomic_writes(stat, 0, 0, 0, false);
 
        flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
        if (flags & EXT4_APPEND_FL)
diff --git a/fs/stat.c b/fs/stat.c
index 7eb2a247ab67..8ba3993dcd09 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -137,20 +137,27 @@ EXPORT_SYMBOL(generic_fill_statx_attr);
  * @unit_min:  Minimum supported atomic write length in bytes
  * @unit_max:  Maximum supported atomic write length in bytes
  * @unit_max_opt: Optimised maximum supported atomic write length in bytes
+ * @is_dio:    Is the stat request for dio
  *
- * Fill in the STATX{_ATTR}_WRITE_ATOMIC_DIO flags in the kstat structure from
- * atomic write unit_min and unit_max values.
+ * Fill in the STATX{_ATTR}_WRITE_ATOMIC_{DIO,BUF} flags in the kstat structure
+ * from atomic write unit_min and unit_max values.
  */
 void generic_fill_statx_atomic_writes(struct kstat *stat,
                                      unsigned int unit_min,
                                      unsigned int unit_max,
-                                     unsigned int unit_max_opt)
+                                     unsigned int unit_max_opt,
+                                     bool is_dio)
 {
-       /* Confirm that the request type is known */
-       stat->result_mask |= STATX_WRITE_ATOMIC_DIO;
+       if (is_dio) {
+               /* Confirm that the request type is known */
+               stat->result_mask |= STATX_WRITE_ATOMIC_DIO;
 
-       /* Confirm that the file attribute type is known */
-       stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_DIO;
+               /* Confirm that the file attribute type is known */
+               stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_DIO;
+       } else {
+               stat->result_mask |= STATX_WRITE_ATOMIC_BUF;
+               stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_BUF;
+       }
 
        if (unit_min) {
                stat->atomic_write_unit_min = unit_min;
@@ -160,7 +167,10 @@ void generic_fill_statx_atomic_writes(struct kstat *stat,
                stat->atomic_write_segments_max = 1;
 
                /* Confirm atomic writes are actually supported */
-               stat->attributes |= STATX_ATTR_WRITE_ATOMIC_DIO;
+               if (is_dio)
+                       stat->attributes |= STATX_ATTR_WRITE_ATOMIC_DIO;
+               else
+                       stat->attributes |= STATX_ATTR_WRITE_ATOMIC_BUF;
        }
 }
 EXPORT_SYMBOL_GPL(generic_fill_statx_atomic_writes);
@@ -206,6 +216,13 @@ int vfs_getattr_nosec(const struct path *path, struct 
kstat *stat,
        stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
                                  STATX_ATTR_DAX);
 
+       if (request_mask & STATX_WRITE_ATOMIC_BUF &&
+           request_mask & STATX_WRITE_ATOMIC_DIO) {
+               /* Both are mutually exclusive, disable them */
+               request_mask &=
+                       ~(STATX_WRITE_ATOMIC_BUF | STATX_WRITE_ATOMIC_DIO);
+       }
+
        idmap = mnt_idmap(path->mnt);
        if (inode->i_op->getattr) {
                int ret;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5b9864c8582e..3efa575570ed 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1087,6 +1087,7 @@ xfs_file_write_iter(
        struct xfs_inode        *ip = XFS_I(inode);
        ssize_t                 ret;
        size_t                  ocount = iov_iter_count(from);
+       bool is_dio = iocb->ki_flags & IOCB_DIRECT;
 
        XFS_STATS_INC(ip->i_mount, xs_write_calls);
 
@@ -1097,10 +1098,10 @@ xfs_file_write_iter(
                return -EIO;
 
        if (iocb->ki_flags & IOCB_ATOMIC) {
-               if (ocount < xfs_get_atomic_write_min(ip))
+               if (ocount < xfs_get_atomic_write_min(ip, is_dio))
                        return -EINVAL;
 
-               if (ocount > xfs_get_atomic_write_max(ip))
+               if (ocount > xfs_get_atomic_write_max(ip, is_dio))
                        return -EINVAL;
 
                ret = generic_atomic_write_valid(iocb, from);
@@ -1111,7 +1112,7 @@ xfs_file_write_iter(
        if (IS_DAX(inode))
                return xfs_file_dax_write(iocb, from);
 
-       if (iocb->ki_flags & IOCB_DIRECT) {
+       if (is_dio) {
                /*
                 * Allow a directio write to fall back to a buffered
                 * write *only* in the case that we're doing a reflink
@@ -1568,7 +1569,7 @@ xfs_file_open(
        if (xfs_is_shutdown(XFS_M(inode->i_sb)))
                return -EIO;
        file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
-       if (xfs_get_atomic_write_min(XFS_I(inode)) > 0)
+       if (xfs_get_atomic_write_min(XFS_I(inode), file->f_flags & O_DIRECT) > 
0)
                file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
        return generic_file_open(inode, file);
 }
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f41fcdd3043b..f036c46b19c5 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -601,81 +601,99 @@ xfs_report_dioalign(
 
 unsigned int
 xfs_get_atomic_write_min(
-       struct xfs_inode        *ip)
+       struct xfs_inode        *ip,
+       bool                    is_dio)
 {
-       struct xfs_mount        *mp = ip->i_mount;
+       if (is_dio) {
+               struct xfs_mount *mp = ip->i_mount;
 
-       /*
-        * If we can complete an atomic write via atomic out of place writes,
-        * then advertise a minimum size of one fsblock.  Without this
-        * mechanism, we can only guarantee atomic writes up to a single LBA.
-        *
-        * If out of place writes are not available, we can guarantee an atomic
-        * write of exactly one single fsblock if the bdev will make that
-        * guarantee for us.
-        */
-       if (xfs_inode_can_hw_atomic_write(ip) ||
-           xfs_inode_can_sw_atomic_write(ip))
-               return mp->m_sb.sb_blocksize;
+               /*
+                * If we can complete an atomic write via atomic out of place 
writes,
+                * then advertise a minimum size of one fsblock.  Without this
+                * mechanism, we can only guarantee atomic writes up to a 
single LBA.
+                *
+                * If out of place writes are not available, we can guarantee 
an atomic
+                * write of exactly one single fsblock if the bdev will make 
that
+                * guarantee for us.
+                */
+               if (xfs_inode_can_hw_atomic_write(ip) ||
+                   xfs_inode_can_sw_atomic_write(ip))
+                       return mp->m_sb.sb_blocksize;
+       }
 
+       /* buffered IO not supported yet so return 0 right away */
        return 0;
 }
 
 unsigned int
 xfs_get_atomic_write_max(
-       struct xfs_inode        *ip)
+       struct xfs_inode        *ip,
+       bool                    is_dio)
 {
        struct xfs_mount        *mp = ip->i_mount;
 
-       /*
-        * If out of place writes are not available, we can guarantee an atomic
-        * write of exactly one single fsblock if the bdev will make that
-        * guarantee for us.
-        */
-       if (!xfs_inode_can_sw_atomic_write(ip)) {
-               if (xfs_inode_can_hw_atomic_write(ip))
-                       return mp->m_sb.sb_blocksize;
-               return 0;
+       if (is_dio) {
+               /*
+                * If out of place writes are not available, we can guarantee 
an atomic
+                * write of exactly one single fsblock if the bdev will make 
that
+                * guarantee for us.
+                */
+               if (!xfs_inode_can_sw_atomic_write(ip)) {
+                       if (xfs_inode_can_hw_atomic_write(ip))
+                               return mp->m_sb.sb_blocksize;
+                       return 0;
+               }
+
+               /*
+                * If we can complete an atomic write via atomic out of place 
writes,
+                * then advertise a maximum size of whatever we can complete 
through
+                * that means.  Hardware support is reported via max_opt, not 
here.
+                */
+               if (XFS_IS_REALTIME_INODE(ip))
+                       return XFS_FSB_TO_B(mp, 
mp->m_groups[XG_TYPE_RTG].awu_max);
+               return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
        }
 
-       /*
-        * If we can complete an atomic write via atomic out of place writes,
-        * then advertise a maximum size of whatever we can complete through
-        * that means.  Hardware support is reported via max_opt, not here.
-        */
-       if (XFS_IS_REALTIME_INODE(ip))
-               return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
-       return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
+       /* buffered IO not supported yet so return 0 right away */
+       return 0;
 }
 
 unsigned int
 xfs_get_atomic_write_max_opt(
-       struct xfs_inode        *ip)
+       struct xfs_inode        *ip,
+       bool                    is_dio)
 {
-       unsigned int            awu_max = xfs_get_atomic_write_max(ip);
+       if (is_dio) {
+               unsigned int awu_max = xfs_get_atomic_write_max(ip, is_dio);
 
-       /* if the max is 1x block, then just keep behaviour that opt is 0 */
-       if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
-               return 0;
+               /* if the max is 1x block, then just keep behaviour that opt is 
0 */
+               if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
+                       return 0;
 
-       /*
-        * Advertise the maximum size of an atomic write that we can tell the
-        * block device to perform for us.  In general the bdev limit will be
-        * less than our out of place write limit, but we don't want to exceed
-        * the awu_max.
-        */
-       return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max);
+               /*
+                * Advertise the maximum size of an atomic write that we can 
tell the
+                * block device to perform for us.  In general the bdev limit 
will be
+                * less than our out of place write limit, but we don't want to 
exceed
+                * the awu_max.
+                */
+               return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max);
+       }
+
+       /* buffered IO not supported yet so return 0 right away */
+       return 0;
 }
 
 static void
 xfs_report_atomic_write(
        struct xfs_inode        *ip,
-       struct kstat            *stat)
+       struct kstat            *stat,
+       bool                    is_dio)
 {
        generic_fill_statx_atomic_writes(stat,
-                       xfs_get_atomic_write_min(ip),
-                       xfs_get_atomic_write_max(ip),
-                       xfs_get_atomic_write_max_opt(ip));
+                                        xfs_get_atomic_write_min(ip, is_dio),
+                                        xfs_get_atomic_write_max(ip, is_dio),
+                                        xfs_get_atomic_write_max_opt(ip, 
is_dio),
+                                        is_dio);
 }
 
 STATIC int
@@ -741,8 +759,11 @@ xfs_vn_getattr(
        case S_IFREG:
                if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
                        xfs_report_dioalign(ip, stat);
-               if (request_mask & STATX_WRITE_ATOMIC_DIO)
-                       xfs_report_atomic_write(ip, stat);
+               if (request_mask &
+                   (STATX_WRITE_ATOMIC_DIO | STATX_WRITE_ATOMIC_BUF))
+                       xfs_report_atomic_write(ip, stat,
+                                               (request_mask &
+                                                STATX_WRITE_ATOMIC_DIO));
                fallthrough;
        default:
                stat->blksize = xfs_stat_blksize(ip);
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index 0896f6b8b3b8..09e79263add1 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -19,8 +19,8 @@ int xfs_inode_init_security(struct inode *inode, struct inode 
*dir,
 extern void xfs_setup_inode(struct xfs_inode *ip);
 extern void xfs_setup_iops(struct xfs_inode *ip);
 extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
-unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip);
-unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip);
-unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip);
+unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip, bool is_dio);
+unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip, bool is_dio);
+unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip, bool is_dio);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..2dec66913e97 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3563,7 +3563,8 @@ void generic_fill_statx_attr(struct inode *inode, struct 
kstat *stat);
 void generic_fill_statx_atomic_writes(struct kstat *stat,
                                      unsigned int unit_min,
                                      unsigned int unit_max,
-                                     unsigned int unit_max_opt);
+                                     unsigned int unit_max_opt,
+                                     bool is_dio);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, 
unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h
index 19ea9339b9bd..3b69910a5998 100644
--- a/include/trace/misc/fs.h
+++ b/include/trace/misc/fs.h
@@ -162,4 +162,5 @@
                { STATX_MNT_ID_UNIQUE,  "MNT_ID_UNIQUE" },      \
                { STATX_SUBVOL,         "SUBVOL" },             \
                { STATX_WRITE_ATOMIC_DIO,       "WRITE_ATOMIC_DIO" },   \
+               { STATX_WRITE_ATOMIC_BUF,       "WRITE_ATOMIC_BUF" },   \
                { STATX_DIO_READ_ALIGN, "DIO_READ_ALIGN" })
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 57f558be933e..2d77da04df23 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
 /* Old name kept for backward compatibility */
 #define STATX_WRITE_ATOMIC     STATX_WRITE_ATOMIC_DIO
 #define STATX_DIO_READ_ALIGN   0x00020000U     /* Want/got dio read alignment 
info */
+#define STATX_WRITE_ATOMIC_BUF 0x00040000U     /* Want/got buf-io 
atomic_write_* fields */
 
 #define STATX__RESERVED                0x80000000U     /* Reserved for future 
struct statx expansion */
 
@@ -259,6 +260,7 @@ struct statx {
 #define STATX_ATTR_WRITE_ATOMIC_DIO    0x00400000 /* File supports dio atomic 
write operations */
 /* Old name kept for backward compatibility */
 #define STATX_ATTR_WRITE_ATOMIC        STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF    0x00800000 /* File supports buf-io 
atomic write operations */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 57f558be933e..a7e0036669c2 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
 /* Old name kept for backward compatibility */
 #define STATX_WRITE_ATOMIC     STATX_WRITE_ATOMIC_DIO
 #define STATX_DIO_READ_ALIGN   0x00020000U     /* Want/got dio read alignment 
info */
+#define STATX_WRITE_ATOMIC_BUF  0x00040000U    /* Want/got buf-io 
atomic_write_* fields */
 
 #define STATX__RESERVED                0x80000000U     /* Reserved for future 
struct statx expansion */
 
@@ -259,6 +260,7 @@ struct statx {
 #define STATX_ATTR_WRITE_ATOMIC_DIO    0x00400000 /* File supports dio atomic 
write operations */
 /* Old name kept for backward compatibility */
 #define STATX_ATTR_WRITE_ATOMIC        STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF    0x00800000 /* File supports buf-io 
atomic write operations */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h 
b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index 57f558be933e..2d77da04df23 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
 /* Old name kept for backward compatibility */
 #define STATX_WRITE_ATOMIC     STATX_WRITE_ATOMIC_DIO
 #define STATX_DIO_READ_ALIGN   0x00020000U     /* Want/got dio read alignment 
info */
+#define STATX_WRITE_ATOMIC_BUF 0x00040000U     /* Want/got buf-io 
atomic_write_* fields */
 
 #define STATX__RESERVED                0x80000000U     /* Reserved for future 
struct statx expansion */
 
@@ -259,6 +260,7 @@ struct statx {
 #define STATX_ATTR_WRITE_ATOMIC_DIO    0x00400000 /* File supports dio atomic 
write operations */
 /* Old name kept for backward compatibility */
 #define STATX_ATTR_WRITE_ATOMIC        STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF    0x00800000 /* File supports buf-io 
atomic write operations */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
-- 
2.51.0


Reply via email to