> From: Yuri Tikhonov [mailto:[EMAIL PROTECTED] > Hello, > > I tested the h/w accelerated RAID-5 using the kernel with PAGE_SIZE set to > 64KB and found the bonnie++ application hangs-up during the "Re-writing" > test. I made some investigations and discovered that the hang-up occurs > because one of the mpage_end_io_read() calls is missing (these are the > callbacks initiated from the ops_complete_biofill() function). > > The fact is that my low-level ADMA driver (the ppc440spe one) successfully > initiated the ops_complete_biofill() callback but the ops_complete_biofill() > function itself skipped calling the bi_end_io() handler of the completed bio > (current dev->read) because during processing of this (current dev->read) bio > some other request had come to the sh (current dev_q->toread). Thus > ops_complete_biofill() scheduled another biofill operation which, as a > result, overwrote the unacknowledged bio (dev->read in ops_run_biofill()), > and so we lost the previous dev->read bio completely. > > Here is a patch that solves this problem. Perhaps this might be implemented > in some more elegant and effective way. What are your thoughts regarding > this? > > Regards, Yuri > > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index 08b4893..7abc96b 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -838,11 +838,24 @@ static void ops_complete_biofill(void *stripe_head_ref) > /* acknowledge completion of a biofill operation */ > /* and check if we need to reply to a read request > */ > - if (test_bit(R5_Wantfill, &dev_q->flags) && !dev_q->toread) { > + if (test_bit(R5_Wantfill, &dev_q->flags)) { > struct bio *rbi, *rbi2; > struct r5dev *dev = &sh->dev[i]; > > - clear_bit(R5_Wantfill, &dev_q->flags); > + /* There is a chance that another fill operation > + * had been scheduled for this dev while we > + * processed sh. In this case do one of the following > + * alternatives: > + * - if there is no active completed biofill for the dev > + * then go to the next dev leaving Wantfill set; > + * - if there is active completed biofill for the dev > + * then ack it but leave Wantfill set. > + */ > + if (dev_q->toread && !dev->read) > + continue; > + > + if (!dev_q->toread) > + clear_bit(R5_Wantfill, &dev_q->flags); > > /* The access to dev->read is outside of the > * spin_lock_irq(&conf->device_lock), but is protected
This still looks racy... I think the complete fix is to make the
R5_Wantfill and dev_q->toread accesses atomic. Please test the
following patch (also attached) and let me know if it fixes what you are
seeing:
Applies on top of git://lost.foo-projects.org/~dwillia2/git/iop
md-for-linus
---
raid5: fix ops_complete_biofill race in the asynchronous offload case
Protect against dev_q->toread toggling while testing and clearing the
R5_Wantfill bit. This change prevents all asynchronous completions
(tasklets) from running during handle_stripe.
---
drivers/md/raid5.c | 16 +++++++++-------
1 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2f9022d..91c14c6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -824,6 +824,7 @@ static void ops_complete_biofill(void
*stripe_head_ref)
(unsigned long long)sh->sector);
/* clear completed biofills */
+ spin_lock(&sq->lock);
for (i = sh->disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
struct r5_queue_dev *dev_q = &sq->dev[i];
@@ -861,6 +862,7 @@ static void ops_complete_biofill(void
*stripe_head_ref)
}
clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
+ spin_unlock(&sq->lock);
return_io(return_bi);
@@ -2279,7 +2281,7 @@ static int add_queue_bio(struct stripe_queue *sq,
struct bio *bi, int dd_idx,
(unsigned long long)bi->bi_sector,
(unsigned long long)sq->sector);
- spin_lock(&sq->lock);
+ spin_lock_bh(&sq->lock);
spin_lock_irq(&conf->device_lock);
sh = sq->sh;
if (forwrite) {
@@ -2306,7 +2308,7 @@ static int add_queue_bio(struct stripe_queue *sq,
struct bio *bi, int dd_idx,
*bip = bi;
bi->bi_phys_segments ++;
spin_unlock_irq(&conf->device_lock);
- spin_unlock(&sq->lock);
+ spin_unlock_bh(&sq->lock);
pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
(unsigned long long)bi->bi_sector,
@@ -2339,7 +2341,7 @@ static int add_queue_bio(struct stripe_queue *sq,
struct bio *bi, int dd_idx,
overlap:
set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
spin_unlock_irq(&conf->device_lock);
- spin_unlock(&sq->lock);
+ spin_unlock_bh(&sq->lock);
return 0;
}
@@ -3127,7 +3129,7 @@ static void handle_stripe5(struct stripe_head *sh)
atomic_read(&sh->count), sq->pd_idx,
sh->ops.pending, sh->ops.ack, sh->ops.complete);
- spin_lock(&sq->lock);
+ spin_lock_bh(&sq->lock);
clear_bit(STRIPE_HANDLE, &sh->state);
s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
@@ -3370,7 +3372,7 @@ static void handle_stripe5(struct stripe_head *sh)
if (sh->ops.count)
pending = get_stripe_work(sh);
- spin_unlock(&sq->lock);
+ spin_unlock_bh(&sq->lock);
if (pending)
raid5_run_ops(sh, pending);
@@ -3397,7 +3399,7 @@ static void handle_stripe6(struct stripe_head *sh,
struct page *tmp_page)
atomic_read(&sh->count), pd_idx, r6s.qd_idx);
memset(&s, 0, sizeof(s));
- spin_lock(&sq->lock);
+ spin_lock_bh(&sq->lock);
clear_bit(STRIPE_HANDLE, &sh->state);
s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
@@ -3576,7 +3578,7 @@ static void handle_stripe6(struct stripe_head *sh,
struct page *tmp_page)
if (s.expanding && s.locked == 0)
handle_stripe_expansion(conf, sh, &r6s);
- spin_unlock(&sq->lock);
+ spin_unlock_bh(&sq->lock);
return_io(return_bi);
md-accel-fix-ops-complete-biofill-race.patch
Description: md-accel-fix-ops-complete-biofill-race.patch
