Hello,

On Thu, May 17, 2012 at 10:59:57PM -0400, [email protected] wrote:
> From: Kent Overstreet <[email protected]>
> 
> Break out bio_split() and bio_pair_split(), and get rid of the
> limitations of bio_split()

Again, what are those limitations being removed and why and at what
cost?

> diff --git a/fs/bio.c b/fs/bio.c
> index 3332800..781a8cf 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -35,7 +35,7 @@
>   */
>  #define BIO_INLINE_VECS              4
>  
> -static mempool_t *bio_split_pool __read_mostly;
> +static struct bio_set *bio_split_pool __read_mostly;
>  
>  /*
>   * if you change this list, also change bvec_alloc or things will
> @@ -1464,84 +1464,124 @@ void bio_endio(struct bio *bio, int error)
>  }
>  EXPORT_SYMBOL(bio_endio);
>  

Please add /** comment documenting the function.

> -void bio_pair_release(struct bio_pair *bp)
> +struct bio *bio_split(struct bio *bio, int sectors,
> +                   gfp_t gfp, struct bio_set *bs)
>  {
> -     if (atomic_dec_and_test(&bp->cnt)) {
> -             struct bio *master = bp->bio1.bi_private;
> +     unsigned idx, vcnt = 0, nbytes = sectors << 9;
> +     struct bio_vec *bv;
> +     struct bio *ret = NULL;

Maybe naming it @split is better?

> +
> +     BUG_ON(sectors <= 0);
> +
> +     if (current->bio_list)
> +             gfp &= ~__GFP_WAIT;

Please explain what this means.

> +     if (nbytes >= bio->bi_size)
> +             return bio;
> +
> +     trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
> +                             bio->bi_sector + sectors);
> +
> +     bio_for_each_segment(bv, bio, idx) {
> +             vcnt = idx - bio->bi_idx;
> +
> +             if (!nbytes) {
> +                     ret = bio_alloc_bioset(gfp, 0, bs);
> +                     if (!ret)
> +                             return NULL;
> +
> +                     ret->bi_io_vec = bio_iovec(bio);
> +                     ret->bi_flags |= 1 << BIO_CLONED;
> +                     break;
> +             } else if (nbytes < bv->bv_len) {
> +                     ret = bio_alloc_bioset(gfp, ++vcnt, bs);
> +                     if (!ret)
> +                             return NULL;
> +
> +                     memcpy(ret->bi_io_vec, bio_iovec(bio),
> +                            sizeof(struct bio_vec) * vcnt);
> +
> +                     ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
> +                     bv->bv_offset   += nbytes;
> +                     bv->bv_len      -= nbytes;

Please don't indent assignments.

> +                     break;
> +             }
> +
> +             nbytes -= bv->bv_len;
> +     }

I find the code a bit confusing.  Wouldn't it be better to structure
it as

        bio_for_each_segment() {
                find splitting point;
        }

        Do all of the splitting.

> +     ret->bi_bdev    = bio->bi_bdev;
> +     ret->bi_sector  = bio->bi_sector;
> +     ret->bi_size    = sectors << 9;
> +     ret->bi_rw      = bio->bi_rw;
> +     ret->bi_vcnt    = vcnt;
> +     ret->bi_max_vecs = vcnt;
> +     ret->bi_end_io  = bio->bi_end_io;
> +     ret->bi_private = bio->bi_private;
>  
> -             bio_endio(master, bp->error);
> -             mempool_free(bp, bp->bio2.bi_private);
> +     bio->bi_sector  += sectors;
> +     bio->bi_size    -= sectors << 9;
> +     bio->bi_idx      = idx;

I personally would prefer not having indentations here either.

> +     if (bio_integrity(bio)) {
> +             bio_integrity_clone(ret, bio, gfp, bs);
> +             bio_integrity_trim(ret, 0, bio_sectors(ret));
> +             bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
>       }
> +
> +     return ret;
>  }
> -EXPORT_SYMBOL(bio_pair_release);
> +EXPORT_SYMBOL_GPL(bio_split);

/** comment would be nice.

> -static void bio_pair_end_1(struct bio *bi, int err)
> +void bio_pair_release(struct bio_pair *bp)
>  {
> -     struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
> -
> -     if (err)
> -             bp->error = err;
> +     if (atomic_dec_and_test(&bp->cnt)) {
> +             bp->orig->bi_end_io = bp->bi_end_io;
> +             bp->orig->bi_private = bp->bi_private;

So, before, split wouldn't override orig->bi_private.  Now, it does so
while the bio is in flight.  I don't know.  If the only benefit of
temporary override is avoiding have a separate end_io call, I think
not doing so is better.  Also, behavior changes as subtle as this
*must* be noted in the patch description.

> -     bio_pair_release(bp);
> +             bio_endio(bp->orig, 0);
> +             bio_put(&bp->split);
> +     }
>  }
> +EXPORT_SYMBOL(bio_pair_release);

And it would be super-nice if you can add /** comment here too.

> -/*
> - * split a bio - only worry about a bio with a single page in its iovec
> - */
> -struct bio_pair *bio_split(struct bio *bi, int first_sectors)
> +struct bio_pair *bio_pair_split(struct bio *bio, int first_sectors)
>  {
> -     struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
> -
> -     if (!bp)
> -             return bp;
> +     struct bio_pair *bp;
> +     struct bio *split = bio_split(bio, first_sectors,
> +                                   GFP_NOIO, bio_split_pool);

I know fs/bio.c isn't a bastion of good style but let's try improve it
bit by bit.  It's generally considered a bad style to put a statement
which may fail in local var declaration.  IOW, please do,

        struct bio *split;

        split = bio_split();
        if (!split)
                return NULL;

> @@ -1694,8 +1734,7 @@ static int __init init_bio(void)
>       if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
>               panic("bio: can't create integrity pool\n");
>  
> -     bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
> -                                                  sizeof(struct bio_pair));
> +     bio_split_pool = bioset_create(BIO_POOL_SIZE, offsetof(struct bio_pair, 
> split));
>       if (!bio_split_pool)
>               panic("bio: can't create split pool\n");

BIO_SPLIT_ENTRIES is probably something dependent on how splits can
stack, so I don't think we want to change that to BIO_POOL_SIZE.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to