Folks, see if the following patch helps. AFAICS it closes a pretty real
race - we could call block_write_full_page() for a page that has sync
IO in progress and blindly change ->b_end_io callbacks on the bh with
pending requests. With a little bit of bad luck they would complete before
we got to ll_rw_block(), thus leading to extra UnlockPage(). All it takes
is a pageout on a page that got partial write recently - if some fragments
were still unmapped we get to call get_block() on them and it can easily
block, providing a decent window for that race.

Fix: postpone changing ->b_end_io until the call of ll_rw_block(); if by
the time of ll_rw_block() some fragments will still have IO in progress -
wait on them.

Comments?
                                                Cheers,
                                                        Al

--- buffer.c    Fri Dec  8 16:19:53 2000
+++ buffer.c.new        Fri Dec  8 16:26:44 2000
@@ -1577,6 +1577,26 @@
  * "Dirty" is valid only with the last case (mapped+uptodate).
  */
 
+static void write_array_async(struct page *page, struct buffer_head **p, int n)
+{
+       int i;
+       if (!n) {
+               UnlockPage(page);
+               return;
+       }
+       /*
+        * If there are pending requests on these guys - wait before changing
+        * ->b_end_io.
+        */
+       for (i=0; i<n; i++) {
+               wait_on_buffer(p[i]);
+               set_bit(BH_Uptodate, &p[i]->b_state);
+               set_bit(BH_Dirty, &p[i]->b_state);
+               p[i]->b_end_io = end_buffer_io_async;
+       }
+       ll_rw_block(WRITE, n, p);
+}
+
 /*
  * block_write_full_page() is SMP-safe - currently it's still
  * being called with the kernel lock held, but the code is ready.
@@ -1616,28 +1636,17 @@
                        if (buffer_new(bh))
                                unmap_underlying_metadata(bh);
                }
-               set_bit(BH_Uptodate, &bh->b_state);
-               set_bit(BH_Dirty, &bh->b_state);
-               bh->b_end_io = end_buffer_io_async;
                atomic_inc(&bh->b_count);
                arr[nr++] = bh;
                bh = bh->b_this_page;
                block++;
        } while (bh != head);
 
-       if (nr) {
-               ll_rw_block(WRITE, nr, arr);
-       } else {
-               UnlockPage(page);
-       }
+       write_array_async(page, arr, nr);
        SetPageUptodate(page);
        return 0;
 out:
-       if (nr) {
-               ll_rw_block(WRITE, nr, arr);
-       } else {
-               UnlockPage(page);
-       }
+       write_array_async(page, arr, nr);
        ClearPageUptodate(page);
        return err;
 }

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

Reply via email to