[PATCH 03/12] md: add raid5_run_ops and support routines

2007-01-22 Thread Dan Williams
From: Dan Williams <[EMAIL PROTECTED]>

Prepare the raid5 implementation to use async_tx for running stripe
operations:
* biofill (copy data into request buffers to satisfy a read request)
* compute block (generate a missing block in the cache from the other
blocks)
* prexor (subtract existing data as part of the read-modify-write process)
* biodrain (copy data out of request buffers to satisfy a write request)
* postxor (recalculate parity for new data that has entered the cache)
* check (verify that the parity is correct)
* io (submit i/o to the member disks)

Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the workqueue
* call bi_end_io for reads in ops_complete_biofill

Signed-off-by: Dan Williams <[EMAIL PROTECTED]>
---

 drivers/md/raid5.c |  520 
 include/linux/raid/raid5.h |   63 +
 2 files changed, 580 insertions(+), 3 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 68b6fea..e70ee17 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -52,6 +52,7 @@
 #include "raid6.h"
 
 #include 
+#include 
 
 /*
  * Stripe cache
@@ -324,6 +325,525 @@ static struct stripe_head *get_active_stripe(raid5_conf_t 
*conf, sector_t sector
return sh;
 }
 
+static int
+raid5_end_read_request(struct bio * bi, unsigned int bytes_done, int error);
+static int
+raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error);
+
+static void ops_run_io(struct stripe_head *sh)
+{
+   raid5_conf_t *conf = sh->raid_conf;
+   int i, disks = sh->disks;
+
+   might_sleep();
+
+   for (i=disks; i-- ;) {
+   int rw;
+   struct bio *bi;
+   mdk_rdev_t *rdev;
+   if (test_and_clear_bit(R5_Wantwrite, >dev[i].flags))
+   rw = WRITE;
+   else if (test_and_clear_bit(R5_Wantread, >dev[i].flags))
+   rw = READ;
+   else
+   continue;
+
+   bi = >dev[i].req;
+
+   bi->bi_rw = rw;
+   if (rw == WRITE)
+   bi->bi_end_io = raid5_end_write_request;
+   else
+   bi->bi_end_io = raid5_end_read_request;
+
+   rcu_read_lock();
+   rdev = rcu_dereference(conf->disks[i].rdev);
+   if (rdev && test_bit(Faulty, >flags))
+   rdev = NULL;
+   if (rdev)
+   atomic_inc(>nr_pending);
+   rcu_read_unlock();
+
+   if (rdev) {
+   if (test_bit(STRIPE_SYNCING, >state) ||
+   test_bit(STRIPE_EXPAND_SOURCE, >state) ||
+   test_bit(STRIPE_EXPAND_READY, >state))
+   md_sync_acct(rdev->bdev, STRIPE_SECTORS);
+
+   bi->bi_bdev = rdev->bdev;
+   PRINTK("%s: for %llu schedule op %ld on disc %d\n",
+   __FUNCTION__, (unsigned long long)sh->sector,
+   bi->bi_rw, i);
+   atomic_inc(>count);
+   bi->bi_sector = sh->sector + rdev->data_offset;
+   bi->bi_flags = 1 << BIO_UPTODATE;
+   bi->bi_vcnt = 1;
+   bi->bi_max_vecs = 1;
+   bi->bi_idx = 0;
+   bi->bi_io_vec = >dev[i].vec;
+   bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
+   bi->bi_io_vec[0].bv_offset = 0;
+   bi->bi_size = STRIPE_SIZE;
+   bi->bi_next = NULL;
+   if (rw == WRITE &&
+   test_bit(R5_ReWrite, >dev[i].flags))
+   atomic_add(STRIPE_SECTORS, 
>corrected_errors);
+   generic_make_request(bi);
+   } else {
+   if (rw == WRITE)
+   set_bit(STRIPE_DEGRADED, >state);
+   PRINTK("skip op %ld on disc %d for sector %llu\n",
+   bi->bi_rw, i, (unsigned long long)sh->sector);
+   clear_bit(R5_LOCKED, >dev[i].flags);
+   set_bit(STRIPE_HANDLE, >state);
+   }
+   }
+}
+
+static struct dma_async_tx_descriptor *
+async_copy_data(int frombio, struct bio *bio, struct page *page, sector_t 
sector,
+   struct dma_async_tx_descriptor *tx)
+{
+   struct bio_vec *bvl;
+   struct page *bio_page;
+   int i;
+   int page_offset;
+
+   if (bio->bi_sector >= sector)
+   page_offset = (signed)(bio->bi_sector - sector) * 512;
+   else
+   page_offset = (signed)(sector - bio->bi_sector) * -512;
+   bio_for_each_segment(bvl, bio, i) {
+   int len = bio_iovec_idx(bio,i)->bv_len;
+   int clen;
+ 

[PATCH 03/12] md: add raid5_run_ops and support routines

2007-01-22 Thread Dan Williams
From: Dan Williams [EMAIL PROTECTED]

Prepare the raid5 implementation to use async_tx for running stripe
operations:
* biofill (copy data into request buffers to satisfy a read request)
* compute block (generate a missing block in the cache from the other
blocks)
* prexor (subtract existing data as part of the read-modify-write process)
* biodrain (copy data out of request buffers to satisfy a write request)
* postxor (recalculate parity for new data that has entered the cache)
* check (verify that the parity is correct)
* io (submit i/o to the member disks)

Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the workqueue
* call bi_end_io for reads in ops_complete_biofill

Signed-off-by: Dan Williams [EMAIL PROTECTED]
---

 drivers/md/raid5.c |  520 
 include/linux/raid/raid5.h |   63 +
 2 files changed, 580 insertions(+), 3 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 68b6fea..e70ee17 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -52,6 +52,7 @@
 #include raid6.h
 
 #include linux/raid/bitmap.h
+#include linux/async_tx.h
 
 /*
  * Stripe cache
@@ -324,6 +325,525 @@ static struct stripe_head *get_active_stripe(raid5_conf_t 
*conf, sector_t sector
return sh;
 }
 
+static int
+raid5_end_read_request(struct bio * bi, unsigned int bytes_done, int error);
+static int
+raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error);
+
+static void ops_run_io(struct stripe_head *sh)
+{
+   raid5_conf_t *conf = sh-raid_conf;
+   int i, disks = sh-disks;
+
+   might_sleep();
+
+   for (i=disks; i-- ;) {
+   int rw;
+   struct bio *bi;
+   mdk_rdev_t *rdev;
+   if (test_and_clear_bit(R5_Wantwrite, sh-dev[i].flags))
+   rw = WRITE;
+   else if (test_and_clear_bit(R5_Wantread, sh-dev[i].flags))
+   rw = READ;
+   else
+   continue;
+
+   bi = sh-dev[i].req;
+
+   bi-bi_rw = rw;
+   if (rw == WRITE)
+   bi-bi_end_io = raid5_end_write_request;
+   else
+   bi-bi_end_io = raid5_end_read_request;
+
+   rcu_read_lock();
+   rdev = rcu_dereference(conf-disks[i].rdev);
+   if (rdev  test_bit(Faulty, rdev-flags))
+   rdev = NULL;
+   if (rdev)
+   atomic_inc(rdev-nr_pending);
+   rcu_read_unlock();
+
+   if (rdev) {
+   if (test_bit(STRIPE_SYNCING, sh-state) ||
+   test_bit(STRIPE_EXPAND_SOURCE, sh-state) ||
+   test_bit(STRIPE_EXPAND_READY, sh-state))
+   md_sync_acct(rdev-bdev, STRIPE_SECTORS);
+
+   bi-bi_bdev = rdev-bdev;
+   PRINTK(%s: for %llu schedule op %ld on disc %d\n,
+   __FUNCTION__, (unsigned long long)sh-sector,
+   bi-bi_rw, i);
+   atomic_inc(sh-count);
+   bi-bi_sector = sh-sector + rdev-data_offset;
+   bi-bi_flags = 1  BIO_UPTODATE;
+   bi-bi_vcnt = 1;
+   bi-bi_max_vecs = 1;
+   bi-bi_idx = 0;
+   bi-bi_io_vec = sh-dev[i].vec;
+   bi-bi_io_vec[0].bv_len = STRIPE_SIZE;
+   bi-bi_io_vec[0].bv_offset = 0;
+   bi-bi_size = STRIPE_SIZE;
+   bi-bi_next = NULL;
+   if (rw == WRITE 
+   test_bit(R5_ReWrite, sh-dev[i].flags))
+   atomic_add(STRIPE_SECTORS, 
rdev-corrected_errors);
+   generic_make_request(bi);
+   } else {
+   if (rw == WRITE)
+   set_bit(STRIPE_DEGRADED, sh-state);
+   PRINTK(skip op %ld on disc %d for sector %llu\n,
+   bi-bi_rw, i, (unsigned long long)sh-sector);
+   clear_bit(R5_LOCKED, sh-dev[i].flags);
+   set_bit(STRIPE_HANDLE, sh-state);
+   }
+   }
+}
+
+static struct dma_async_tx_descriptor *
+async_copy_data(int frombio, struct bio *bio, struct page *page, sector_t 
sector,
+   struct dma_async_tx_descriptor *tx)
+{
+   struct bio_vec *bvl;
+   struct page *bio_page;
+   int i;
+   int page_offset;
+
+   if (bio-bi_sector = sector)
+   page_offset = (signed)(bio-bi_sector - sector) * 512;
+   else
+   page_offset = (signed)(sector - bio-bi_sector) * -512;
+   bio_for_each_segment(bvl, bio, i) {
+   int len = bio_iovec_idx(bio,i)-bv_len;
+