The only reason for the initial read_mapping_page in fork_buffer was to
be sure there are no reads in progress on other buffers on the same
page, so that data will not be read onto the old page after it is
removed from the buffer.  A better way to do that is acquire the buffer
lock for each buffer during the buffer walk.  Then we know that any
read that was in progress has finished, and no new read can start.
Now, if the buffer is uptodate after acquiring the buffer lock, it is
copied to the new page, otherwise we make a modest saving by skipping
the copy.

The big cleanup is, since the full page does not need to be uptodate,
there is no reading of extra blocks just to fill the page.  For a fork
of a buffer on the volume map, those extra blocks might have been free
or mapped to files.  Now there is no IO at all in fork_buffer.

Inode and delta lists still need spinlocks, otherwise, lock coverage
looks close to complete.

int fork_buffer(struct buffer_head *buffer)
{
        struct page *oldpage = buffer->b_page;
        struct address_space *mapping = oldpage->mapping;
        struct inode *inode = mapping->host;
        struct list_head *inode_dirty_list = &tux_inode(inode)->dirty;
        unsigned newdelta = tux_sb(inode->i_sb)->delta & DELTA_MASK;
        unsigned blocksize = inode->i_sb->s_blocksize;

        // Take page lock to protect buffer list
        lock_page(oldpage);

        // The fork happened while waiting for the lock?
        if (bufdelta(buffer) == newdelta) {
                unlock_page(oldpage);
                return 0;
        }

        // Allocate a new page and put buffers on it
        struct page *newpage = alloc_pages(GFP_KERNEL, 0);
        newpage->mapping = oldpage->mapping;
        newpage->index = oldpage->index;
        create_empty_buffers(newpage, blocksize, 0);

        // Walk the two buffer lists together
        struct buffer_head *oldbuf = (void *)oldpage->private, *oldlist = 
oldbuf;
        struct buffer_head *newbuf = (void *)newpage->private;
        do {
                void *olddata = oldbuf->b_data;
                void *newdata = newbuf->b_data;

                // Ensure any read is finished
                lock_buffer(oldbuf);
                if (buffer_uptodate(oldbuf))
                        memcpy(newdata, olddata, blocksize);

                newbuf->b_state = oldbuf->b_state & (BH_Uptodate | BH_Dirty);
                oldbuf->b_page = newpage;
                newbuf->b_page = oldpage;
                oldbuf->b_data = newdata;
                newbuf->b_data = olddata;

                if (buffer_dirty(oldbuf)) {
                        unsigned olddelta = bufdelta(oldbuf);
                        assert(olddelta != newdelta);

                        // Set old buffer dirty in current delta
                        list_move_tail(&oldbuf->b_assoc_buffers, 
inode_dirty_list);
                        set_bufdelta(oldbuf, newdelta);

                        // Add new buffer to earlier delta list
                        list_move_tail(&newbuf->b_assoc_buffers, delta_list + 
olddelta);
                        set_bufdelta(newbuf, olddelta);
                }
                unlock_buffer(oldbuf);
                oldbuf = oldbuf->b_this_page;
                newbuf = newbuf->b_this_page;
        } while (oldbuf != oldlist);

        // Swap the page buffer lists
        oldpage->private = newpage->private;
        newpage->private = (unsigned long)oldlist;

        // Replace page in radix tree
        spin_lock_irq(&mapping->tree_lock);
        void **slot = radix_tree_lookup_slot(&mapping->page_tree, 
oldpage->index);
        radix_tree_replace_slot(slot, newpage);
        spin_unlock_irq(&mapping->tree_lock);
        get_page(newpage);
        put_page(oldpage);
        unlock_page(oldpage);
        return 0;
}

_______________________________________________
Tux3 mailing list
[email protected]
http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3

Reply via email to