On 11/29/18 3:55 PM, Jens Axboe wrote:
> The bio referencing has a trick that doesn't do any actual atomic
> inc/dec on the reference count until we have to elevator to > 1. For the
> async IO O_DIRECT case, we can't use the simple DIO variants, so we use
> __blkdev_direct_IO(). It always grabs an extra reference to the bio
> after allocation, which means we then enter the slower path of actually
> having to do atomic_inc/dec on the count.
> 
> We don't need to do that for the async case, unless we end up going
> multi-bio, in which case we're already doing huge amounts of IO. For the
> smaller IO case (< BIO_MAX_PAGES), we can do without the extra ref.

I accidentally generated this against the aio-poll branch, here's a
version that is not.


diff --git a/fs/block_dev.c b/fs/block_dev.c
index d233a59ea364..5c0a224bf9cb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -281,10 +281,25 @@ struct blkdev_dio {
 
 static struct bio_set blkdev_dio_pool;
 
+static void blkdev_bio_dirty_release(struct bio *bio, bool should_dirty)
+{
+       if (should_dirty) {
+               bio_check_pages_dirty(bio);
+       } else {
+               struct bio_vec *bvec;
+               int i;
+
+               bio_for_each_segment_all(bvec, bio, i)
+                       put_page(bvec->bv_page);
+               bio_put(bio);
+       }
+}
+
 static void blkdev_bio_end_io(struct bio *bio)
 {
        struct blkdev_dio *dio = bio->bi_private;
        bool should_dirty = dio->should_dirty;
+       bool should_free = false;
 
        if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
                if (bio->bi_status && !dio->bio.bi_status)
@@ -302,25 +317,20 @@ static void blkdev_bio_end_io(struct bio *bio)
                        }
 
                        dio->iocb->ki_complete(iocb, ret, 0);
-                       bio_put(&dio->bio);
                } else {
                        struct task_struct *waiter = dio->waiter;
 
                        WRITE_ONCE(dio->waiter, NULL);
                        blk_wake_io_task(waiter);
                }
+               /* last bio is done, now we can free the parent holding dio */
+               should_free = dio->multi_bio;
        }
 
-       if (should_dirty) {
-               bio_check_pages_dirty(bio);
-       } else {
-               struct bio_vec *bvec;
-               int i;
-
-               bio_for_each_segment_all(bvec, bio, i)
-                       put_page(bvec->bv_page);
-               bio_put(bio);
-       }
+       if (!dio->is_sync)
+               blkdev_bio_dirty_release(bio, should_dirty);
+       if (should_free)
+               bio_put(&dio->bio);
 }
 
 static ssize_t
@@ -343,14 +353,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter 
*iter, int nr_pages)
                return -EINVAL;
 
        bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
-       bio_get(bio); /* extra ref for the completion handler */
 
        dio = container_of(bio, struct blkdev_dio, bio);
        dio->is_sync = is_sync = is_sync_kiocb(iocb);
-       if (dio->is_sync)
+       if (dio->is_sync) {
                dio->waiter = current;
-       else
+               bio_get(bio);
+       } else {
                dio->iocb = iocb;
+       }
 
        dio->size = 0;
        dio->multi_bio = false;
@@ -400,6 +411,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter 
*iter, int nr_pages)
                }
 
                if (!dio->multi_bio) {
+                       /*
+                        * async needs an extra reference if we are goign
+                        * multi-bio only, so we can ensure that 'dio'
+                        * sticks around.
+                        */
+                       if (!is_sync)
+                               bio_get(bio);
                        dio->multi_bio = true;
                        atomic_set(&dio->ref, 2);
                } else {
@@ -433,6 +451,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter 
*iter, int nr_pages)
        if (likely(!ret))
                ret = dio->size;
 
+       blkdev_bio_dirty_release(bio, dio->should_dirty);
        bio_put(&dio->bio);
        return ret;
 }

-- 
Jens Axboe

Reply via email to