[PATCH 03/12] md: add raid5_run_ops and support routines
From: Dan Williams <[EMAIL PROTECTED]> Prepare the raid5 implementation to use async_tx for running stripe operations: * biofill (copy data into request buffers to satisfy a read request) * compute block (generate a missing block in the cache from the other blocks) * prexor (subtract existing data as part of the read-modify-write process) * biodrain (copy data out of request buffers to satisfy a write request) * postxor (recalculate parity for new data that has entered the cache) * check (verify that the parity is correct) * io (submit i/o to the member disks) Changelog: * removed ops_complete_biodrain in favor of ops_complete_postxor and ops_complete_write. * removed the workqueue * call bi_end_io for reads in ops_complete_biofill Signed-off-by: Dan Williams <[EMAIL PROTECTED]> --- drivers/md/raid5.c | 520 include/linux/raid/raid5.h | 63 + 2 files changed, 580 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 68b6fea..e70ee17 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -52,6 +52,7 @@ #include "raid6.h" #include +#include /* * Stripe cache @@ -324,6 +325,525 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector return sh; } +static int +raid5_end_read_request(struct bio * bi, unsigned int bytes_done, int error); +static int +raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error); + +static void ops_run_io(struct stripe_head *sh) +{ + raid5_conf_t *conf = sh->raid_conf; + int i, disks = sh->disks; + + might_sleep(); + + for (i=disks; i-- ;) { + int rw; + struct bio *bi; + mdk_rdev_t *rdev; + if (test_and_clear_bit(R5_Wantwrite, >dev[i].flags)) + rw = WRITE; + else if (test_and_clear_bit(R5_Wantread, >dev[i].flags)) + rw = READ; + else + continue; + + bi = >dev[i].req; + + bi->bi_rw = rw; + if (rw == WRITE) + bi->bi_end_io = raid5_end_write_request; + else + bi->bi_end_io = raid5_end_read_request; + + rcu_read_lock(); + rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && test_bit(Faulty, >flags)) + rdev = NULL; + if (rdev) + atomic_inc(>nr_pending); + rcu_read_unlock(); + + if (rdev) { + if (test_bit(STRIPE_SYNCING, >state) || + test_bit(STRIPE_EXPAND_SOURCE, >state) || + test_bit(STRIPE_EXPAND_READY, >state)) + md_sync_acct(rdev->bdev, STRIPE_SECTORS); + + bi->bi_bdev = rdev->bdev; + PRINTK("%s: for %llu schedule op %ld on disc %d\n", + __FUNCTION__, (unsigned long long)sh->sector, + bi->bi_rw, i); + atomic_inc(>count); + bi->bi_sector = sh->sector + rdev->data_offset; + bi->bi_flags = 1 << BIO_UPTODATE; + bi->bi_vcnt = 1; + bi->bi_max_vecs = 1; + bi->bi_idx = 0; + bi->bi_io_vec = >dev[i].vec; + bi->bi_io_vec[0].bv_len = STRIPE_SIZE; + bi->bi_io_vec[0].bv_offset = 0; + bi->bi_size = STRIPE_SIZE; + bi->bi_next = NULL; + if (rw == WRITE && + test_bit(R5_ReWrite, >dev[i].flags)) + atomic_add(STRIPE_SECTORS, >corrected_errors); + generic_make_request(bi); + } else { + if (rw == WRITE) + set_bit(STRIPE_DEGRADED, >state); + PRINTK("skip op %ld on disc %d for sector %llu\n", + bi->bi_rw, i, (unsigned long long)sh->sector); + clear_bit(R5_LOCKED, >dev[i].flags); + set_bit(STRIPE_HANDLE, >state); + } + } +} + +static struct dma_async_tx_descriptor * +async_copy_data(int frombio, struct bio *bio, struct page *page, sector_t sector, + struct dma_async_tx_descriptor *tx) +{ + struct bio_vec *bvl; + struct page *bio_page; + int i; + int page_offset; + + if (bio->bi_sector >= sector) + page_offset = (signed)(bio->bi_sector - sector) * 512; + else + page_offset = (signed)(sector - bio->bi_sector) * -512; + bio_for_each_segment(bvl, bio, i) { + int len = bio_iovec_idx(bio,i)->bv_len; + int clen; +
[PATCH 03/12] md: add raid5_run_ops and support routines
From: Dan Williams [EMAIL PROTECTED] Prepare the raid5 implementation to use async_tx for running stripe operations: * biofill (copy data into request buffers to satisfy a read request) * compute block (generate a missing block in the cache from the other blocks) * prexor (subtract existing data as part of the read-modify-write process) * biodrain (copy data out of request buffers to satisfy a write request) * postxor (recalculate parity for new data that has entered the cache) * check (verify that the parity is correct) * io (submit i/o to the member disks) Changelog: * removed ops_complete_biodrain in favor of ops_complete_postxor and ops_complete_write. * removed the workqueue * call bi_end_io for reads in ops_complete_biofill Signed-off-by: Dan Williams [EMAIL PROTECTED] --- drivers/md/raid5.c | 520 include/linux/raid/raid5.h | 63 + 2 files changed, 580 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 68b6fea..e70ee17 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -52,6 +52,7 @@ #include raid6.h #include linux/raid/bitmap.h +#include linux/async_tx.h /* * Stripe cache @@ -324,6 +325,525 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector return sh; } +static int +raid5_end_read_request(struct bio * bi, unsigned int bytes_done, int error); +static int +raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error); + +static void ops_run_io(struct stripe_head *sh) +{ + raid5_conf_t *conf = sh-raid_conf; + int i, disks = sh-disks; + + might_sleep(); + + for (i=disks; i-- ;) { + int rw; + struct bio *bi; + mdk_rdev_t *rdev; + if (test_and_clear_bit(R5_Wantwrite, sh-dev[i].flags)) + rw = WRITE; + else if (test_and_clear_bit(R5_Wantread, sh-dev[i].flags)) + rw = READ; + else + continue; + + bi = sh-dev[i].req; + + bi-bi_rw = rw; + if (rw == WRITE) + bi-bi_end_io = raid5_end_write_request; + else + bi-bi_end_io = raid5_end_read_request; + + rcu_read_lock(); + rdev = rcu_dereference(conf-disks[i].rdev); + if (rdev test_bit(Faulty, rdev-flags)) + rdev = NULL; + if (rdev) + atomic_inc(rdev-nr_pending); + rcu_read_unlock(); + + if (rdev) { + if (test_bit(STRIPE_SYNCING, sh-state) || + test_bit(STRIPE_EXPAND_SOURCE, sh-state) || + test_bit(STRIPE_EXPAND_READY, sh-state)) + md_sync_acct(rdev-bdev, STRIPE_SECTORS); + + bi-bi_bdev = rdev-bdev; + PRINTK(%s: for %llu schedule op %ld on disc %d\n, + __FUNCTION__, (unsigned long long)sh-sector, + bi-bi_rw, i); + atomic_inc(sh-count); + bi-bi_sector = sh-sector + rdev-data_offset; + bi-bi_flags = 1 BIO_UPTODATE; + bi-bi_vcnt = 1; + bi-bi_max_vecs = 1; + bi-bi_idx = 0; + bi-bi_io_vec = sh-dev[i].vec; + bi-bi_io_vec[0].bv_len = STRIPE_SIZE; + bi-bi_io_vec[0].bv_offset = 0; + bi-bi_size = STRIPE_SIZE; + bi-bi_next = NULL; + if (rw == WRITE + test_bit(R5_ReWrite, sh-dev[i].flags)) + atomic_add(STRIPE_SECTORS, rdev-corrected_errors); + generic_make_request(bi); + } else { + if (rw == WRITE) + set_bit(STRIPE_DEGRADED, sh-state); + PRINTK(skip op %ld on disc %d for sector %llu\n, + bi-bi_rw, i, (unsigned long long)sh-sector); + clear_bit(R5_LOCKED, sh-dev[i].flags); + set_bit(STRIPE_HANDLE, sh-state); + } + } +} + +static struct dma_async_tx_descriptor * +async_copy_data(int frombio, struct bio *bio, struct page *page, sector_t sector, + struct dma_async_tx_descriptor *tx) +{ + struct bio_vec *bvl; + struct page *bio_page; + int i; + int page_offset; + + if (bio-bi_sector = sector) + page_offset = (signed)(bio-bi_sector - sector) * 512; + else + page_offset = (signed)(sector - bio-bi_sector) * -512; + bio_for_each_segment(bvl, bio, i) { + int len = bio_iovec_idx(bio,i)-bv_len; +