Partial writes to a physical block device (rather than to some hole in a regular file) can cause unnecessary read-modify-write cycles. In particular, redundant reads can take noticeable overhead on cloud disks.
Write full blocks with pwritev(2) instead. Signed-off-by: Gao Xiang <[email protected]> --- configure.ac | 2 ++ include/erofs/io.h | 5 +++++ lib/inode.c | 33 +++++++++++++++------------------ lib/io.c | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/configure.ac b/configure.ac index 88f1cbe..a73a9ba 100644 --- a/configure.ac +++ b/configure.ac @@ -216,6 +216,7 @@ AC_CHECK_HEADERS(m4_flatten([ sys/statfs.h sys/sysmacros.h sys/time.h + sys/uio.h unistd.h ])) @@ -274,6 +275,7 @@ AC_CHECK_FUNCS(m4_flatten([ ftello64 pread64 pwrite64 + pwritev posix_fadvise fstatfs sendfile diff --git a/include/erofs/io.h b/include/erofs/io.h index 3179ea1..101a5ba 100644 --- a/include/erofs/io.h +++ b/include/erofs/io.h @@ -16,6 +16,7 @@ extern "C" #define _GNU_SOURCE #endif #include <unistd.h> +#include <sys/uio.h> #include "defs.h" #ifndef O_BINARY @@ -27,6 +28,8 @@ struct erofs_vfile; struct erofs_vfops { ssize_t (*pread)(struct erofs_vfile *vf, void *buf, u64 offset, size_t len); ssize_t (*pwrite)(struct erofs_vfile *vf, const void *buf, u64 offset, size_t len); + ssize_t (*pwritev)(struct erofs_vfile *vf, const struct iovec *iov, + int iovcnt, u64 pos); int (*fsync)(struct erofs_vfile *vf); int (*fallocate)(struct erofs_vfile *vf, u64 offset, size_t len, bool pad); int (*ftruncate)(struct erofs_vfile *vf, u64 length); @@ -53,6 +56,8 @@ ssize_t __erofs_io_write(int fd, const void *buf, size_t len); int erofs_io_fstat(struct erofs_vfile *vf, struct stat *buf); ssize_t erofs_io_pwrite(struct erofs_vfile *vf, const void *buf, u64 pos, size_t len); +ssize_t erofs_io_pwritev(struct erofs_vfile *vf, const struct iovec *iov, + int iovcnt, u64 pos); int erofs_io_fsync(struct erofs_vfile *vf); ssize_t erofs_io_fallocate(struct erofs_vfile *vf, u64 offset, size_t len, bool pad); int erofs_io_ftruncate(struct erofs_vfile *vf, u64 length); diff --git a/lib/inode.c b/lib/inode.c index a36ade2..09f519b 100644 --- a/lib/inode.c +++ b/lib/inode.c @@ -827,6 +827,7 @@ static struct erofs_bhops erofs_write_inline_bhops = { static int erofs_write_tail_end(struct erofs_inode *inode) { + static const u8 zeroed[EROFS_MAX_BLOCK_SIZE]; struct erofs_sb_info *sbi = inode->sbi; struct erofs_buffer_head *bh, *ibh; @@ -843,8 +844,10 @@ static int erofs_write_tail_end(struct erofs_inode *inode) ibh->fsprivate = erofs_igrab(inode); ibh->op = &erofs_write_inline_bhops; } else { + struct iovec iov[2]; + erofs_off_t pos; int ret; - erofs_off_t pos, zero_pos; + bool h0; if (!bh) { bh = erofs_balloc(sbi->bmgr, @@ -874,25 +877,19 @@ static int erofs_write_tail_end(struct erofs_inode *inode) pos = erofs_btell(bh, true) - erofs_blksiz(sbi); /* 0'ed data should be padded at head for 0padding conversion */ - if (erofs_sb_has_lz4_0padding(sbi) && inode->compressed_idata) { - zero_pos = pos; - pos += erofs_blksiz(sbi) - inode->idata_size; - } else { - /* pad 0'ed data for the other cases */ - zero_pos = pos + inode->idata_size; - } - ret = erofs_dev_write(sbi, inode->idata, pos, inode->idata_size); - if (ret) + h0 = erofs_sb_has_lz4_0padding(sbi) && inode->compressed_idata; + DBG_BUGON(inode->idata_size > erofs_blksiz(sbi)); + + iov[h0] = (struct iovec) { .iov_base = inode->idata, + .iov_len = inode->idata_size }; + iov[!h0] = (struct iovec) { .iov_base = (u8 *)zeroed, + erofs_blksiz(sbi) - inode->idata_size }; + ret = erofs_io_pwritev(&sbi->bdev, iov, 2, pos); + if (ret < 0) return ret; + else if (ret < erofs_blksiz(sbi)) + return -EIO; - DBG_BUGON(inode->idata_size > erofs_blksiz(sbi)); - if (inode->idata_size < erofs_blksiz(sbi)) { - ret = erofs_dev_fillzero(sbi, zero_pos, - erofs_blksiz(sbi) - inode->idata_size, - false); - if (ret) - return ret; - } inode->idata_size = 0; free(inode->idata); inode->idata = NULL; diff --git a/lib/io.c b/lib/io.c index 5c3d263..aa043ca 100644 --- a/lib/io.c +++ b/lib/io.c @@ -96,6 +96,39 @@ ssize_t erofs_io_pwrite(struct erofs_vfile *vf, const void *buf, return written; } +ssize_t erofs_io_pwritev(struct erofs_vfile *vf, const struct iovec *iov, + int iovcnt, u64 pos) +{ + ssize_t ret, written; + int i; + + if (__erofs_unlikely(cfg.c_dry_run)) + return 0; + +#ifdef HAVE_PWRITEV + if (!vf->ops) { + ret = pwritev(vf->fd, iov, iovcnt, pos + vf->offset); + if (ret < 0) + return -errno; + return ret; + } +#endif + if (vf->ops && vf->ops->pwritev) + return vf->ops->pwritev(vf, iov, iovcnt, pos); + written = 0; + for (i = 0; i < iovcnt; ++i) { + ret = erofs_io_pwrite(vf, iov[i].iov_base, pos, iov[i].iov_len); + if (ret < iov[i].iov_len) { + if (ret < 0) + return ret; + return written + ret; + } + written += iov[i].iov_len; + pos += iov[i].iov_len; + } + return written; +} + int erofs_io_fsync(struct erofs_vfile *vf) { int ret; -- 2.43.5
