Folks, please help to test it. WARNING: IT'S ON THE CRITICAL PATH
AND IF IT'S BROKEN IT WILL EAT YOUR DATA. SILENTLY. EXTREME DANGER. USE
ONLY ON SCRATCH BOXEN.
        Patch merges most of block_write_{partial,cont}_page and fixes a
couple of bad bugs in the latter. It should fix the problems with
write-beyond-EOF on FAT and HPFS and it cleans the code up. The place it
touches is extermely dangerous, due to the fact that all writes on local
filesystems go there. I _think_ that code is correct now and local tests
seem to be OK, but I don't want to submit it before I'll have independent
results of testing. Please, try it on a boxen where you can afford
reinstall and watch for data corruption in files. Comments on new code are
also welcome, indeed.
                                                        Cheers,
                                                                Al
--- linux-2.3.30-pre3/fs/buffer.c       Sun Nov 28 01:35:44 1999
+++ linux-bird.symlink/fs/buffer.c      Tue Nov 30 22:40:45 1999
@@ -1378,24 +1378,17 @@
        return err;
 }
 
-int block_write_partial_page(struct file *file, struct page *page, unsigned long 
offset, unsigned long bytes, const char * buf)
+int block_write_zero_range(struct dentry *dentry, struct page *page,
+               unsigned zerofrom, unsigned from, unsigned to,
+               const char * buf)
 {
-       struct dentry *dentry = file->f_dentry;
        struct inode *inode = dentry->d_inode;
+       unsigned zeroto = 0, block_start, block_end;
        unsigned long block;
-       int err, partial;
-       unsigned long blocksize, start_block, end_block;
-       unsigned long start_offset, start_bytes, end_bytes;
-       unsigned long bbits, blocks, i, len;
-       struct buffer_head *bh, *head;
-       char *target_buf, *kaddr;
-       int need_balance_dirty;
-
-       kaddr = (char *)kmap(page);
-       target_buf = kaddr + offset;
-
-       if (!PageLocked(page))
-               BUG();
+       int err = 0, partial = 0, need_balance_dirty = 0;
+       unsigned blocksize, bbits;
+       struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
+       char *kaddr = (char *)kmap(page);
 
        blocksize = inode->i_sb->s_blocksize;
        if (!page->buffers)
@@ -1404,49 +1397,21 @@
 
        bbits = inode->i_sb->s_blocksize_bits;
        block = page->index << (PAGE_CACHE_SHIFT - bbits);
-       blocks = PAGE_CACHE_SIZE >> bbits;
-       start_block = offset >> bbits;
-       end_block = (offset + bytes - 1) >> bbits;
-       start_offset = offset & (blocksize - 1);
-       start_bytes = blocksize - start_offset;
-       if (start_bytes > bytes)
-               start_bytes = bytes;
-       end_bytes = (offset+bytes) & (blocksize - 1);
-       if (end_bytes > bytes)
-               end_bytes = bytes;
-
-       if (offset < 0 || offset >= PAGE_SIZE)
-               BUG();
-       if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
-               BUG();
-       if (start_block < 0 || start_block >= blocks)
-               BUG();
-       if (end_block < 0 || end_block >= blocks)
-               BUG();
 
-       i = 0;
-       bh = head;
-       partial = 0;
-       need_balance_dirty = 0;
-       do {
+       /*
+        * First pass - map what needs to be mapped, initiate reads
+        * on the boundaries if needed (i.e. if block is partially covered
+        * _and_ is not up-to-date _and_ is not new).
+        */
+       for(bh = head, block_start = 0; bh != head || !block_start;
+           block++, block_start=block_end, bh = bh->b_this_page) {
                if (!bh)
                        BUG();
-
-               if ((i < start_block) || (i > end_block)) {
-                       if (!buffer_uptodate(bh))
-                               partial = 1;
-                       goto skip;
-               }
-
-               /*
-                * If the buffer is not up-to-date, we need to ask the low-level
-                * FS to do something for us (we used to have assumptions about
-                * the meaning of b_blocknr etc, that's bad).
-                *
-                * If "update" is set, that means that the low-level FS should
-                * try to make sure that the block is up-to-date because we're
-                * not going to fill it completely.
-                */
+               block_end = block_start+blocksize;
+               if (block_end <= zerofrom)
+                       continue;
+               if (block_start >= to)
+                       break;
                bh->b_end_io = end_buffer_io_sync;
                if (!buffer_mapped(bh)) {
                        err = inode->i_op->get_block(inode, block, bh, 1);
@@ -1454,71 +1419,73 @@
                                goto out;
                        unmap_underlying_metadata(bh);
                }
-
-               if (!buffer_uptodate(bh) && (start_offset || (end_bytes && (i == 
end_block)))) {
-                       if (buffer_new(bh)) {
-                               memset(kaddr + i*blocksize, 0, blocksize);
-                       } else {
-                               ll_rw_block(READ, 1, &bh);
-                               wait_on_buffer(bh);
-                               err = -EIO;
-                               if (!buffer_uptodate(bh))
-                                       goto out;
-                       }
-               }
-
-               len = blocksize;
-               if (start_offset) {
-                       len = start_bytes;
-                       start_offset = 0;
-               } else if (end_bytes && (i == end_block)) {
-                       len = end_bytes;
-                       end_bytes = 0;
+               if (buffer_new(bh)) {
+                       zeroto = block_end;
+                       if (block_start < zerofrom)
+                               zerofrom = block_start;
+                       continue;
                }
-               if (target_buf >= kaddr + PAGE_SIZE)
-                       BUG();
-               if (target_buf+len-1 >= kaddr + PAGE_SIZE)
-                       BUG();
-               err = copy_from_user(target_buf, buf, len);
-               target_buf += len;
-               buf += len;
-
-               /*
-                * we dirty buffers only after copying the data into
-                * the page - this way we can dirty the buffer even if
-                * the bh is still doing IO.
-                *
-                * NOTE! This also does a direct dirty balace check,
-                * rather than relying on bdflush just waking up every
-                * once in a while. This is to catch (and slow down)
-                * the processes that write tons of buffer..
-                *
-                * Note how we do NOT want to do this in the full block
-                * case: full pages are flushed not by the people who
-                * dirtied them, but by people who need memory. And we
-                * should not penalize them for somebody else writing
-                * lots of dirty pages.
-                */
-               set_bit(BH_Uptodate, &bh->b_state);
-               if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
-                       __mark_dirty(bh, 0);
-                       need_balance_dirty = 1;
+               if (!buffer_uptodate(bh) &&
+                    (block_start < zerofrom || block_end > to)) {
+                       ll_rw_block(READ, 1, &bh);
+                       *wait_bh++=bh;
                }
-
-               if (err) {
-                       err = -EFAULT;
+       }
+       /*
+        * If we issued read requests - let them complete.
+        */
+       while(wait_bh > wait) {
+               wait_on_buffer(*--wait_bh);
+               err = -EIO;
+               if (!buffer_uptodate(*wait_bh))
                        goto out;
+       }
+       /*
+        * Now we can copy the data.
+        */
+       if (zerofrom < from)
+               memset(kaddr+zerofrom, 0, from-zerofrom);
+       if (from < to)
+               err = copy_from_user(kaddr+from, buf, to-from);
+       if (to < zeroto)
+               memset(kaddr+to, 0, zeroto-to);
+       else
+               zeroto = to;
+       if (err < 0)
+               goto out;
+       /*
+        * Second pass: check if all out-of-range blocks are up-to-date
+        * and mark the rest up-to-date and dirty.
+        *
+        * NOTE! This also does a direct dirty balace check,
+        * rather than relying on bdflush just waking up every
+        * once in a while. This is to catch (and slow down)
+        * the processes that write tons of buffer..
+        *
+        * Note how we do NOT want to do this in the full block
+        * case: full pages are flushed not by the people who
+        * dirtied them, but by people who need memory. And we
+        * should not penalize them for somebody else writing
+        * lots of dirty pages.
+        */
+       for(bh = head, block_start = 0;
+           bh != head || !block_start;
+           block_start=block_end, bh = bh->b_this_page) {
+               block_end = block_start + blocksize;
+               if (block_end <= zerofrom || block_start >= zeroto) {
+                       if (!buffer_uptodate(bh))
+                               partial = 1;
+               } else {
+                       set_bit(BH_Uptodate, &bh->b_state);
+                       if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
+                               __mark_dirty(bh, 0);
+                               need_balance_dirty = 1;
+                       }
                }
-
-skip:
-               i++;
-               block++;
-               bh = bh->b_this_page;
-       } while (bh != head);
+       }
 
        if (need_balance_dirty)
                balance_dirty(bh->b_dev);
-
        /*
         * is this a partial write that happened to make all buffers
         * uptodate then we can optimize away a bogus readpage() for
@@ -1528,183 +1495,49 @@
        if (!partial)
                SetPageUptodate(page);
        kunmap(page);
-       return bytes;
+       return 0;
 out:
        ClearPageUptodate(page);
        kunmap(page);
        return err;
 }
 
-/*
- * For moronic filesystems that do not allow holes in file.
- * we allow offset==PAGE_SIZE, bytes==0
- */
-
-int block_write_cont_page(struct file *file, struct page *page, unsigned long offset, 
unsigned long bytes, const char * buf)
+int block_write_partial_page(struct file *file, struct page *page, unsigned long 
+offset, unsigned long bytes, const char * buf)
 {
        struct dentry *dentry = file->f_dentry;
-       struct inode *inode = dentry->d_inode;
-       unsigned long block;
-       int err, partial;
-       unsigned long blocksize, start_block, end_block;
-       unsigned long start_offset, start_bytes, end_bytes;
-       unsigned long bbits, blocks, i, len;
-       struct buffer_head *bh, *head;
-       char * target_buf, *target_data;
-       unsigned long data_offset = offset;
-       int need_balance_dirty;
-
-       offset = inode->i_size - (page->index << PAGE_CACHE_SHIFT);
-       if (page->index > (inode->i_size >> PAGE_CACHE_SHIFT))
-               offset = 0;
-       else if (offset >= data_offset)
-               offset = data_offset;
-       bytes += data_offset - offset;
-
-       target_buf = (char *)page_address(page) + offset;
-       target_data = (char *)page_address(page) + data_offset;
+       int err;
 
        if (!PageLocked(page))
                BUG();
-
-       blocksize = inode->i_sb->s_blocksize;
-       if (!page->buffers)
-               create_empty_buffers(page, inode, blocksize);
-       head = page->buffers;
-
-       bbits = inode->i_sb->s_blocksize_bits;
-       block = page->index << (PAGE_CACHE_SHIFT - bbits);
-       blocks = PAGE_CACHE_SIZE >> bbits;
-       start_block = offset >> bbits;
-       end_block = (offset + bytes - 1) >> bbits;
-       start_offset = offset & (blocksize - 1);
-       start_bytes = blocksize - start_offset;
-       if (start_bytes > bytes)
-               start_bytes = bytes;
-       end_bytes = (offset+bytes) & (blocksize - 1);
-       if (end_bytes > bytes)
-               end_bytes = bytes;
-
-       if (offset < 0 || offset > PAGE_SIZE)
+       if (offset < 0 || offset >= PAGE_SIZE)
                BUG();
        if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
                BUG();
-       if (start_block < 0 || start_block > blocks)
-               BUG();
-       if (end_block < 0 || end_block >= blocks)
-               BUG();
-
-       i = 0;
-       bh = head;
-       partial = 0;
-       need_balance_dirty = 0;
-       do {
-               if (!bh)
-                       BUG();
-
-               if ((i < start_block) || (i > end_block)) {
-                       if (!buffer_uptodate(bh))
-                               partial = 1;
-                       goto skip;
-               }
-
-               /*
-                * If the buffer is not up-to-date, we need to ask the low-level
-                * FS to do something for us (we used to have assumptions about
-                * the meaning of b_blocknr etc, that's bad).
-                *
-                * If "update" is set, that means that the low-level FS should
-                * try to make sure that the block is up-to-date because we're
-                * not going to fill it completely.
-                */
-               bh->b_end_io = end_buffer_io_sync;
-               if (!buffer_mapped(bh)) {
-                       err = inode->i_op->get_block(inode, block, bh, 1);
-                       if (err)
-                               goto out;
-                       unmap_underlying_metadata(bh);
-               }
-
-               if (!buffer_uptodate(bh) && (start_offset || (end_bytes && (i == 
end_block)))) {
-                       if (buffer_new(bh)) {
-                               memset(bh->b_data, 0, bh->b_size);
-                       } else {
-                               ll_rw_block(READ, 1, &bh);
-                               wait_on_buffer(bh);
-                               err = -EIO;
-                               if (!buffer_uptodate(bh))
-                                       goto out;
-                       }
-               }
-
-               len = blocksize;
-               if (start_offset) {
-                       len = start_bytes;
-                       start_offset = 0;
-               } else if (end_bytes && (i == end_block)) {
-                       len = end_bytes;
-                       end_bytes = 0;
-               }
-               err = 0;
-               if (target_buf+len<=target_data)
-                       memset(target_buf, 0, len);
-               else if (target_buf<target_data) {
-                       memset(target_buf, 0, target_data-target_buf);
-                       copy_from_user(target_data, buf,
-                                       len+target_buf-target_data);
-               } else
-                       err = copy_from_user(target_buf, buf, len);
-               target_buf += len;
-               buf += len;
 
-               /*
-                * we dirty buffers only after copying the data into
-                * the page - this way we can dirty the buffer even if
-                * the bh is still doing IO.
-                *
-                * NOTE! This also does a direct dirty balace check,
-                * rather than relying on bdflush just waking up every
-                * once in a while. This is to catch (and slow down)
-                * the processes that write tons of buffer..
-                *
-                * Note how we do NOT want to do this in the full block
-                * case: full pages are flushed not by the people who
-                * dirtied them, but by people who need memory. And we
-                * should not penalize them for somebody else writing
-                * lots of dirty pages.
-                */
-               set_bit(BH_Uptodate, &bh->b_state);
-               if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
-                       __mark_dirty(bh, 0);
-                       need_balance_dirty = 1;
-               }
-
-               if (err) {
-                       err = -EFAULT;
-                       goto out;
-               }
+       err = block_write_range(dentry, page, offset, bytes, buf);
+       return err ? err : bytes;
+}
 
-skip:
-               i++;
-               block++;
-               bh = bh->b_this_page;
-       } while (bh != head);
+/*
+ * For moronic filesystems that do not allow holes in file.
+ * we allow offset==PAGE_SIZE, bytes==0
+ */
 
-       if (need_balance_dirty)
-               balance_dirty(bh->b_dev);
+int block_write_cont_page(struct file *file, struct page *page, unsigned long offset, 
+unsigned long bytes, const char * buf)
+{
+       struct dentry *dentry = file->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       int err;
+       unsigned zerofrom = offset;
 
-       /*
-        * is this a partial write that happened to make all buffers
-        * uptodate then we can optimize away a bogus readpage() for
-        * the next read(). Here we 'discover' wether the page went
-        * uptodate as a result of this (potentially partial) write.
-        */
-       if (!partial)
-               SetPageUptodate(page);
-       return bytes;
-out:
-       ClearPageUptodate(page);
-       return err;
+       if (page->index > (inode->i_size >> PAGE_CACHE_SHIFT))
+               zerofrom = 0;
+       else if (page->index == (inode->i_size >> PAGE_CACHE_SHIFT) &&
+                offset > (inode->i_size & ~PAGE_CACHE_MASK))
+               zerofrom = inode->i_size & ~PAGE_CACHE_MASK;
+       err = block_write_zero_range(dentry, page, zerofrom,offset,offset+bytes,
+                                       buf);
+       return err ? err : bytes;
 }
 
 
--- linux-2.3.30-pre3/include/linux/fs.h        Tue Nov 30 20:44:54 1999
+++ linux-bird.symlink/include/linux/fs.h       Mon Nov 29 17:40:24 1999
@@ -948,6 +948,9 @@
 extern int block_write_full_page (struct dentry *, struct page *);
 extern int block_write_partial_page (struct file *, struct page *, unsigned long, 
unsigned long, const char *);
 extern int block_write_cont_page (struct file *, struct page *, unsigned long, 
unsigned long, const char *);
+extern int block_write_zero_range(struct dentry *, struct page *, unsigned, unsigned, 
+unsigned, const char *);
+#define block_write_range(dentry, page, from, len, buf) \
+       block_write_zero_range(dentry, page, from, from, from+len, buf)
 extern int block_flushpage(struct inode *, struct page *, unsigned long);
 
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
--- linux-2.3.30-pre3/kernel/ksyms.c    Sun Nov 28 01:35:47 1999
+++ linux-bird.symlink/kernel/ksyms.c   Mon Nov 29 17:37:55 1999
@@ -184,6 +184,7 @@
 EXPORT_SYMBOL(block_write_full_page);
 EXPORT_SYMBOL(block_write_partial_page);
 EXPORT_SYMBOL(block_write_cont_page);
+EXPORT_SYMBOL(block_write_zero_range);
 EXPORT_SYMBOL(block_flushpage);
 EXPORT_SYMBOL(generic_file_read);
 EXPORT_SYMBOL(do_generic_file_read);

Reply via email to