On Jun 13, 2017, at 11:15 AM, Jens Axboe wrote:
>
> Add four flags for the pwritev2(2) system call, allowing an application
> to give the kernel a hint about what on-media life times can be
> expected from a given write.
>
> The intent is for these values to be relative to each other, no
> absolute meaning should be attached to these flag names.
>
> Define IOCB flags to carry this information over, and finally
> transform them into the block defined stream values.
>
> Signed-off-by: Jens Axboe
> ---
> fs/read_write.c | 17 -
> include/linux/fs.h | 18 ++
> include/uapi/linux/fs.h | 4
> 3 files changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 47c1d4484df9..1734728aa48b 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -678,7 +678,9 @@ static ssize_t do_iter_readv_writev(struct file *filp,
> struct iov_iter *iter,
> struct kiocb kiocb;
> ssize_t ret;
>
> - if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
> + if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_WRITE_LIFE_SHORT |
> + RWF_WRITE_LIFE_MEDIUM | RWF_WRITE_LIFE_LONG |
> + RWF_WRITE_LIFE_EXTREME))
> return -EOPNOTSUPP;
Since this is essentially just a 4-bit mask, which also works fine if the stream
ID is an arbitrary value, it would be more clear to just define a mask like
RWF_WRITE_LIFE_MASK that covers these bits instead of spelling out individual
bits.
>
> init_sync_kiocb(&kiocb, filp);
> @@ -688,6 +690,19 @@ static ssize_t do_iter_readv_writev(struct file *filp,
> struct iov_iter *iter,
> kiocb.ki_flags |= IOCB_DSYNC;
> if (flags & RWF_SYNC)
> kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
> + if (flags & RWF_WRITE_LIFE_SHORT) {
> + kiocb.ki_flags |= IOCB_WRITE_LIFE_SHORT;
> + file_inode(filp)->i_stream = WRITE_LIFE_SHORT;
> + } else if (flags & RWF_WRITE_LIFE_MEDIUM) {
> + kiocb.ki_flags |= IOCB_WRITE_LIFE_MEDIUM;
> + file_inode(filp)->i_stream = WRITE_LIFE_MEDIUM;
> + } else if (flags & RWF_WRITE_LIFE_LONG) {
> + kiocb.ki_flags |= IOCB_WRITE_LIFE_LONG;
> + file_inode(filp)->i_stream = WRITE_LIFE_LONG;
> + } else if (flags & RWF_WRITE_LIFE_EXTREME) {
> + kiocb.ki_flags |= IOCB_WRITE_LIFE_EXTREME;
> + file_inode(filp)->i_stream = WRITE_LIFE_EXTREME;
> + }
This should just pass all of the bits through:
if (flags & RWF_WRITE_LIFE_MASK) {
file_inode(filp)->i_stream =
((flags & RWF_WRITE_LIFE_MASK) >> RWF_WRITE_LIFE_SHIFT;
kiocb.ki_flags |=
file_inode(filp)->i_stream << IOCB_WRITE_LIFE_SHIFT;
}
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index bb8c246eeda8..7f542e0b0e17 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -269,6 +269,10 @@ struct writeback_control;
> #define IOCB_DSYNC(1 << 4)
> #define IOCB_SYNC (1 << 5)
> #define IOCB_WRITE(1 << 6)
> +#define IOCB_WRITE_LIFE_SHORT(1 << 7)
> +#define IOCB_WRITE_LIFE_MEDIUM (1 << 8)
> +#define IOCB_WRITE_LIFE_LONG (1 << 9)
> +#define IOCB_WRITE_LIFE_EXTREME (1 << 10)
#define IOCB_WRITE_LIFE_SHIFT 7
#define IOCB_WRITE_LIFE_MASK(0xf << IOCB_WRITE_LIFE_SHIFT)
or if you want to make it clear which bits are actually used, either:
#define IOCB_WRITE_LIFE_MASK((1 << 7) | (1 << 8) | (1 << 9) | (1 <<10))
or
#define IOCB_WRITE_LIFE_MASK(BIT(7) | BIT(8) | BIT(9) | BIT(10))
> @@ -293,6 +297,20 @@ static inline void init_sync_kiocb(struct kiocb *kiocb,
> struct file *filp)
> };
> }
>
> +static inline int iocb_streamid(const struct kiocb *iocb)
> +{
> + if (iocb->ki_flags & IOCB_WRITE_LIFE_SHORT)
> + return WRITE_LIFE_SHORT;
> + else if (iocb->ki_flags & IOCB_WRITE_LIFE_MEDIUM)
> + return WRITE_LIFE_MEDIUM;
> + else if (iocb->ki_flags & IOCB_WRITE_LIFE_LONG)
> + return WRITE_LIFE_LONG;
> + else if (iocb->ki_flags & IOCB_WRITE_LIFE_EXTREME)
> + return WRITE_LIFE_EXTREME;
> +
> + return WRITE_LIFE_UNKNOWN;
> +}
> +
> /*
> * "descriptor" for what we're up to with a read.
> * This allows us to use the same read code yet
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 24e61a54feaa..34145b29657e 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -360,5 +360,9 @@ struct fscrypt_key {
> #define RWF_HIPRI 0x0001 /* high priority request,
> poll if possible */
> #define RWF_DSYNC 0x0002 /* per-IO O_DSYNC */
> #define RWF_SYNC 0x0004 /* per-IO O_SYNC */
> +#define RWF_WRITE_LIFE_SHORT 0x0008 /* short life time write */
> +#define RWF_WRITE_LIFE_MEDIUM0x0