From: Keith Busch <[email protected]> The blk-mq based drivers have every incoming bio validated by an unconditional __bio_split_to_limits() call, which rejects any segment that does not meet the queue's dma_alignment with BLK_STS_INVAL, so they only see viable requests. A bio-based driver, though, receives a bio whose memory alignment has not been checked.
Misalignment is possible for vectors supplied from user space direct-io. When a stacking driver forwards a misaligned bio to a member device, that member may reject it with BLK_STS_INVAL if the lower level attempts to split the bio to the queue limits. The stacker tends to mishandle the error: dm-raid1 may degrade an otherwise healthy array. Alternatively, some lower level bio based block drivers never attempt to split their bio and assume the one received is viable. If it's unaligned, block devices like brd and pmem may corrupt their data as they have a strong dependency on sector size aligned bvecs. Validate the source against the device's dma_alignment where the bio is built from the iov_iter, rejecting misaligned I/O with -EINVAL before it is submitted. This is done opportunistically in a path that already pins the pages, so no additional io vector walking is needed. The required alignment is supplied by the callers as vec_align_mask (bdev_dma_alignment()); passthrough and the bounce path pass 0 as they have no such requirement. If a vector is misaligned while building the bio, any pages already pinned into that bio are released before returning. Cc: [email protected] Fixes: 5ff3f74e145a ("block: simplify direct io validity check") Fixes: 7eac33186957 ("iomap: simplify direct io validity check") Signed-off-by: Keith Busch <[email protected]> --- block/bio.c | 19 ++++++++++++++++--- block/blk-map.c | 2 +- block/fops.c | 3 ++- fs/iomap/direct-io.c | 3 ++- include/linux/bio.h | 2 +- include/linux/uio.h | 3 ++- lib/iov_iter.c | 9 ++++++++- 7 files changed, 32 insertions(+), 9 deletions(-) diff --git a/block/bio.c b/block/bio.c index f2a5f4d0a9672..1bd7da889e069 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1242,7 +1242,7 @@ static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter, * is returned only if 0 pages could be pinned. */ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, - unsigned len_align_mask) + unsigned len_align_mask, unsigned vec_align_mask) { iov_iter_extraction_t flags = 0; @@ -1251,6 +1251,11 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, if (iov_iter_is_bvec(iter)) { bio_iov_bvec_set(bio, iter); + + if (mp_bvec_iter_offset(bio->bi_io_vec, bio->bi_iter) & + vec_align_mask) + return -EINVAL; + iov_iter_advance(iter, bio->bi_iter.bi_size); return 0; } @@ -1265,8 +1270,16 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec, BIO_MAX_SIZE - bio->bi_iter.bi_size, - &bio->bi_vcnt, bio->bi_max_vecs, flags); + &bio->bi_vcnt, bio->bi_max_vecs, + vec_align_mask, flags); if (ret <= 0) { + if (ret == -EINVAL) { + bio_release_pages(bio, false); + bio_clear_flag(bio, BIO_PAGE_PINNED); + bio->bi_iter.bi_size = 0; + bio->bi_vcnt = 0; + return ret; + } if (!bio->bi_vcnt) return ret; break; @@ -1377,7 +1390,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter, ssize_t ret; ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec + 1, len, - &bio->bi_vcnt, bio->bi_max_vecs - 1, 0); + &bio->bi_vcnt, bio->bi_max_vecs - 1, 0, 0); if (ret <= 0) { if (!bio->bi_vcnt) { folio_put(folio); diff --git a/block/blk-map.c b/block/blk-map.c index 768549f19f97e..c9535efe1a913 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -274,7 +274,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, * No alignment requirements on our part to support arbitrary * passthrough commands. */ - ret = bio_iov_iter_get_pages(bio, iter, 0); + ret = bio_iov_iter_get_pages(bio, iter, 0, 0); if (ret) goto out_put; ret = blk_rq_append_bio(rq, bio); diff --git a/block/fops.c b/block/fops.c index 15783a6180dec..928ba9be170cd 100644 --- a/block/fops.c +++ b/block/fops.c @@ -47,7 +47,8 @@ static inline int blkdev_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, struct block_device *bdev) { return bio_iov_iter_get_pages(bio, iter, - bdev_logical_block_size(bdev) - 1); + bdev_logical_block_size(bdev) - 1, + bdev_dma_alignment(bdev)); } #define DIO_INLINE_BIO_VECS 4 diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b485e3b191daf..645a4e9cd25f9 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -358,7 +358,8 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter, iomap_max_bio_size(&iter->iomap), alignment); else ret = bio_iov_iter_get_pages(bio, dio->submit.iter, - alignment - 1); + alignment - 1, + bdev_dma_alignment(bio->bi_bdev)); if (unlikely(ret)) goto out_put_bio; ret = bio->bi_iter.bi_size; diff --git a/include/linux/bio.h b/include/linux/bio.h index 8f33f717b14f5..13be7edb524fc 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -477,7 +477,7 @@ int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, size_t len, enum req_op op); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, - unsigned len_align_mask); + unsigned len_align_mask, unsigned vec_align_mask); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); diff --git a/include/linux/uio.h b/include/linux/uio.h index a9bc5b3067e32..be8b2625b376a 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -391,7 +391,8 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages, size_t *offset0); ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv, size_t max_size, unsigned short *nr_vecs, - unsigned short max_vecs, iov_iter_extraction_t extraction_flags); + unsigned short max_vecs, unsigned align_mask, + iov_iter_extraction_t extraction_flags); /** * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 273919b161617..ccd5b49f6b78d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1886,6 +1886,8 @@ static unsigned int get_contig_folio_len(struct page **pages, * @max_size: maximum size to extract from @iter * @nr_vecs: number of vectors in @bv (on in and output) * @max_vecs: maximum vectors in @bv, including those filled before calling + * @align_mask: reject with -EINVAL if the source address or length is not + * aligned to this mask * @extraction_flags: flags to qualify request * * Like iov_iter_extract_pages(), but returns physically contiguous ranges @@ -1897,14 +1899,19 @@ static unsigned int get_contig_folio_len(struct page **pages, */ ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv, size_t max_size, unsigned short *nr_vecs, - unsigned short max_vecs, iov_iter_extraction_t extraction_flags) + unsigned short max_vecs, unsigned align_mask, + iov_iter_extraction_t extraction_flags) { + unsigned long start = (unsigned long)iter_iov_addr(iter); unsigned short entries_left = max_vecs - *nr_vecs; unsigned short nr_pages, i = 0; size_t left, offset, len; struct page **pages; ssize_t size; + if ((start | iter_iov_len(iter)) & align_mask) + return -EINVAL; + /* * Move page array up in the allocated memory for the bio vecs as far as * possible so that we can start filling biovecs from the beginning -- 2.52.0
