On Wed, Dec 06 2006, Jens Axboe wrote:
> I still can't help but think we can do better than this, and that this
> is nothing more than optimizing for a benchmark. For high performance
> I/O, you will be doing > 1 page bio's anyway and this patch wont help
> you at all. Perhaps we can just kill bio_vec slabs completely, and
> create bio slabs instead with differing sizes. So instead of having 1
> bio slab and 5 bio_vec slabs, change that to 5 bio slabs that leave room
> for the bio_vec list at the end. That would always eliminate the extra
> allocation, at the cost of blowing the 256-page case into a order 1 page
> allocation (256*16 + sizeof(*bio) > PAGE_SIZE) for the 4kb 64-bit archs,
> which is something I've always tried to avoid.

That would look something like this. You'd probably want to re-tweak the
slab sizes now, so that we get the most out of the slab page. If we
accept the 2^1 order allocation for the largest bio, we can make it
larger than 256 pages at no extra cost. Probably around 500 pages would
still fit inside the two pages.

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 08a40f4..b4bf3b3 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -813,7 +813,7 @@ static int crypt_ctr(struct dm_target *t
                goto bad4;
        }
 
-       cc->bs = bioset_create(MIN_IOS, MIN_IOS, 4);
+       cc->bs = bioset_create(MIN_IOS, 4);
        if (!cc->bs) {
                ti->error = "Cannot allocate crypt bioset";
                goto bad_bs;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index da663d2..581c24a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -60,7 +60,7 @@ static int resize_pool(unsigned int new_
                if (!_io_pool)
                        return -ENOMEM;
 
-               _bios = bioset_create(16, 16, 4);
+               _bios = bioset_create(16, 4);
                if (!_bios) {
                        mempool_destroy(_io_pool);
                        _io_pool = NULL;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index fc4f743..98f6768 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -973,7 +973,7 @@ static struct mapped_device *alloc_dev(i
        if (!md->tio_pool)
                goto bad3;
 
-       md->bs = bioset_create(16, 16, 4);
+       md->bs = bioset_create(16, 4);
        if (!md->bs)
                goto bad_no_bioset;
 
diff --git a/fs/bio.c b/fs/bio.c
index aa4d09b..452e8f7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,10 +30,6 @@
 
 #define BIO_POOL_SIZE 256
 
-static kmem_cache_t *bio_slab __read_mostly;
-
-#define BIOVEC_NR_POOLS 6
-
 /*
  * a small number of entries is fine, not going to be performance critical.
  * basically we just need to survive
@@ -41,23 +37,23 @@ static kmem_cache_t *bio_slab __read_mos
 #define BIO_SPLIT_ENTRIES 8    
 mempool_t *bio_split_pool __read_mostly;
 
-struct biovec_slab {
+struct bio_slab {
        int nr_vecs;
-       char *name; 
+       const char *name;
        kmem_cache_t *slab;
 };
 
-/*
- * if you change this list, also change bvec_alloc or things will
- * break badly! cannot be bigger than what you can fit into an
- * unsigned short
- */
-
-#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
-       BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
+#define BIOSLAB_NR_POOLS 6
+#define BIOSLAB(x) { .nr_vecs = x, .name = "bio-"__stringify(x) }
+static struct bio_slab bio_slabs[BIOSLAB_NR_POOLS] __read_mostly = {
+       BIOSLAB(0),
+       BIOSLAB(4),
+       BIOSLAB(16),
+       BIOSLAB(64),
+       BIOSLAB(128),
+       BIOSLAB(BIO_MAX_PAGES),
 };
-#undef BV
+#undef BIOSLAB
 
 /*
  * bio_set is used to allow other portions of the IO system to
@@ -66,8 +62,7 @@ static struct biovec_slab bvec_slabs[BIO
  * and the bvec_slabs[].
  */
 struct bio_set {
-       mempool_t *bio_pool;
-       mempool_t *bvec_pools[BIOVEC_NR_POOLS];
+       mempool_t *bio_pools[BIOSLAB_NR_POOLS];
 };
 
 /*
@@ -76,45 +71,12 @@ struct bio_set {
  */
 static struct bio_set *fs_bio_set;
 
-static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned 
long *idx, struct bio_set *bs)
-{
-       struct bio_vec *bvl;
-
-       /*
-        * see comment near bvec_array define!
-        */
-       switch (nr) {
-               case   1        : *idx = 0; break;
-               case   2 ...   4: *idx = 1; break;
-               case   5 ...  16: *idx = 2; break;
-               case  17 ...  64: *idx = 3; break;
-               case  65 ... 128: *idx = 4; break;
-               case 129 ... BIO_MAX_PAGES: *idx = 5; break;
-               default:
-                       return NULL;
-       }
-       /*
-        * idx now points to the pool we want to allocate from
-        */
-
-       bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-       if (bvl) {
-               struct biovec_slab *bp = bvec_slabs + *idx;
-
-               memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
-       }
-
-       return bvl;
-}
-
 void bio_free(struct bio *bio, struct bio_set *bio_set)
 {
        const int pool_idx = BIO_POOL_IDX(bio);
 
-       BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
-
-       mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
-       mempool_free(bio, bio_set->bio_pool);
+       BIO_BUG_ON(pool_idx >= BIOSLAB_NR_POOLS);
+       mempool_free(bio, bio_set->bio_pools[pool_idx]);
 }
 
 /*
@@ -160,26 +122,51 @@ void bio_init(struct bio *bio)
  **/
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
-       struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
+       struct bio *bio = NULL;
+       long idx;
+
+       switch (nr_iovecs) {
+       case   0:
+               idx = 0;
+               break;
+       case   1 ...   4:
+               idx = 1;
+               break;
+       case   5 ...  16:
+               idx = 2;
+               break;
+       case  17 ...  64:
+               idx = 3;
+               break;
+       case  65 ... 128:
+               idx = 4;
+               break;
+       case 129 ... BIO_MAX_PAGES:
+               idx = 5;
+               break;
+       default:
+               goto out;
+       }
+
+       bio = mempool_alloc(bs->bio_pools[idx], gfp_mask);
 
        if (likely(bio)) {
                struct bio_vec *bvl = NULL;
 
                bio_init(bio);
-               if (likely(nr_iovecs)) {
-                       unsigned long idx = 0; /* shut up gcc */
-
-                       bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
-                       if (unlikely(!bvl)) {
-                               mempool_free(bio, bs->bio_pool);
-                               bio = NULL;
-                               goto out;
-                       }
+
+               if (nr_iovecs) {
+                       struct bio_slab *bslab = &bio_slabs[idx];
+
+                       bvl = (void *) bio + sizeof(*bio);
+                       memset(bvl, 0, bslab->nr_vecs * sizeof(struct bio_vec));
+
                        bio->bi_flags |= idx << BIO_POOL_OFFSET;
-                       bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
+                       bio->bi_max_vecs = bslab->nr_vecs;
                }
                bio->bi_io_vec = bvl;
        }
+
 out:
        return bio;
 }
@@ -1120,89 +1107,78 @@ struct bio_pair *bio_split(struct bio *b
  * create memory pools for biovec's in a bio_set.
  * use the global biovec slabs created for general use.
  */
-static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale)
+static int bio_create_pools(struct bio_set *bs, int pool_entries, int scale)
 {
        int i;
 
-       for (i = 0; i < BIOVEC_NR_POOLS; i++) {
-               struct biovec_slab *bp = bvec_slabs + i;
-               mempool_t **bvp = bs->bvec_pools + i;
+       for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
+               struct bio_slab *bslab = bio_slabs + i;
+               mempool_t **bvp = bs->bio_pools + i;
 
                if (pool_entries > 1 && i >= scale)
                        pool_entries >>= 1;
 
-               *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
+               *bvp = mempool_create_slab_pool(pool_entries, bslab->slab);
                if (!*bvp)
                        return -ENOMEM;
        }
        return 0;
 }
 
-static void biovec_free_pools(struct bio_set *bs)
+static void bio_free_pools(struct bio_set *bs)
 {
        int i;
 
-       for (i = 0; i < BIOVEC_NR_POOLS; i++) {
-               mempool_t *bvp = bs->bvec_pools[i];
+       for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
+               mempool_t *bp = bs->bio_pools[i];
 
-               if (bvp)
-                       mempool_destroy(bvp);
+               if (bp)
+                       mempool_destroy(bp);
        }
 
 }
 
 void bioset_free(struct bio_set *bs)
 {
-       if (bs->bio_pool)
-               mempool_destroy(bs->bio_pool);
-
-       biovec_free_pools(bs);
+       bio_free_pools(bs);
 
        kfree(bs);
 }
 
-struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
+struct bio_set *bioset_create(int bio_pool_size, int scale)
 {
        struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
 
        if (!bs)
                return NULL;
 
-       bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab);
-       if (!bs->bio_pool)
-               goto bad;
-
-       if (!biovec_create_pools(bs, bvec_pool_size, scale))
+       if (!bio_create_pools(bs, bio_pool_size, scale))
                return bs;
 
-bad:
        bioset_free(bs);
        return NULL;
 }
 
-static void __init biovec_init_slabs(void)
+static void __init bio_init_slabs(void)
 {
        int i;
 
-       for (i = 0; i < BIOVEC_NR_POOLS; i++) {
+       for (i = 0; i < BIOSLAB_NR_POOLS; i++) {
                int size;
-               struct biovec_slab *bvs = bvec_slabs + i;
+               struct bio_slab *bs = bio_slabs + i;
 
-               size = bvs->nr_vecs * sizeof(struct bio_vec);
-               bvs->slab = kmem_cache_create(bvs->name, size, 0,
-                                SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+               size = bs->nr_vecs * sizeof(struct bio_vec) + sizeof(struct 
bio);
+               bs->slab = kmem_cache_create(bs->name, size, 0, SLAB_PANIC,
+                                               NULL, NULL);
        }
 }
 
 static int __init init_bio(void)
 {
-       int megabytes, bvec_pool_entries;
-       int scale = BIOVEC_NR_POOLS;
-
-       bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
-                               SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       int scale = BIOSLAB_NR_POOLS;
+       int megabytes;
 
-       biovec_init_slabs();
+       bio_init_slabs();
 
        megabytes = nr_free_pages() >> (20 - PAGE_SHIFT);
 
@@ -1225,9 +1201,8 @@ static int __init init_bio(void)
         * the system is completely unable to allocate memory, so we only
         * need enough to make progress.
         */
-       bvec_pool_entries = 1 + scale;
 
-       fs_bio_set = bioset_create(BIO_POOL_SIZE, bvec_pool_entries, scale);
+       fs_bio_set = bioset_create(BIO_POOL_SIZE, scale);
        if (!fs_bio_set)
                panic("bio: can't allocate bios\n");
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 092dbd0..d6d0047 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -276,7 +276,7 @@ extern struct bio_pair *bio_split(struct
 extern mempool_t *bio_split_pool;
 extern void bio_pair_release(struct bio_pair *dbio);
 
-extern struct bio_set *bioset_create(int, int, int);
+extern struct bio_set *bioset_create(int, int);
 extern void bioset_free(struct bio_set *);
 
 extern struct bio *bio_alloc(gfp_t, int);

-- 
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to