Sorry, this patch has a fault. Block reservation should not be called if tux3_get_block_delay() return a mapped buffer.
I'll re-send it tomorrow morning. On Thu, Jan 15, 2009 at 10:25 PM, Liu Hui <[email protected]> wrote: > I think delay allocation is not only just for allocating blocks or > extents at write back time but also used to merge block allocation > operations which will reduce allocation times and disk fragementation. Yep. Delay allocation reduces buffered-write latency and disk fragement as well. > > IMHO, this patch indeed delays the allocation operations but not > really understand the motive of delay allocations. Thanks. It is a very simple implementation by using the nobh routines to defer block allocation and a private page flag to reserve space. > > > 2009/1/15 Liu XiaoFeng <[email protected]>: > > Here is a simple implementation of delayed allocation for Tux3. > > > > Delayed allocation defers block allocation from > prepare-write(write-begin) > > time to page writeback time. It is a powerful technique and implemented > by > > several filesystems such as XFS, ext4, and btrfs. > > > > Unlike ext4's delalloc, this implementation is independent with extent > tree > > structure. > > > > Signed-off by XiaoFeng Liu. > > > > --- > > > > balloc.c | 1 > > filemap.c | 108 > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > > inode.c | 2 - > > super.c | 7 ++++ > > trace.h | 11 ++++++ > > tux3.h | 30 +++++++++++++++++ > > 6 files changed, 158 insertions(+), 1 deletion(-) > > > > > > diff -pNur tux-orig/balloc.c tux-hack/balloc.c > > --- tux-orig/balloc.c 2009-01-14 20:00:22.000000000 +0800 > > +++ tux-hack/balloc.c 2009-01-15 14:29:26.000000000 +0800 > > @@ -281,6 +281,7 @@ int bfree(struct sb *sb, block_t start, > > clear_bits(bufdata(buffer), start, blocks); > > brelse_dirty(buffer); > > sb->freeblocks += blocks; > > + tux3_release_blocks(sb, blocks); > > //set_sb_dirty(sb); > > mutex_unlock(&sb->bitmap->i_mutex); > > return 0; > > diff -pNur tux-orig/filemap.c tux-hack/filemap.c > > --- tux-orig/filemap.c 2009-01-14 20:00:22.000000000 +0800 > > +++ tux-hack/filemap.c 2009-01-15 14:25:07.000000000 +0800 > > @@ -505,4 +505,112 @@ const struct address_space_operations tu > > .sync_page = block_sync_page, > > .write_begin = tux3_vol_write_begin, > > }; > > + > > + > > +/* > > + * Tux3's delayed allocation > > + * Note: support blocksize == pagesize only > > + * Written by XiaoFeng LIU <[email protected]> > > + */ > > + > > +/* proof of concept */ > > +#define NR_RESERV_BLOCKS 32 > > + > > +static int tux3_da_reserve_blocks(struct super_block *sb, int count) > > +{ > > + long free_blocks; > > + struct sb *sbi = tux_sb(sb); > > + free_blocks = > percpu_counter_read_positive(freeblocks_counter(sbi)); > > + xtrace("freeblocks_counter %ld", free_blocks); > > + > > + if (free_blocks < count + NR_RESERV_BLOCKS) > > + return -ENOSPC; > > + percpu_counter_sub(freeblocks_counter(sbi), count); > > + return 0; > > +} > > + > > +static void tux3_da_release_blocks(struct super_block *sb, int count) > > +{ > > + struct sb *sbi = tux_sb(sb); > > + if (count) { > > + percpu_counter_add(freeblocks_counter(sbi), count); > > + sb->s_dirt = 1; > > + } > > +} > > + > > +static int tux3_get_block_delay(struct inode *inode, sector_t iblock, > > + struct buffer_head *bh_rslt, int create) > > +{ > > + return tux3_get_block(inode, iblock, bh_rslt, 0); > > +} > > + > > +/* > > + * a get_block() called at the writeout time. > > + */ > > +static int tux3_get_block_write(struct inode *inode, sector_t iblock, > > + struct buffer_head *bh_rslt, int create) > > +{ > > + pgoff_t index = (pgoff_t) (iblock >> (PAGE_CACHE_SHIFT - > > inode->i_blkbits)); > > + struct page *page = find_get_page(inode->i_mapping, index); > > + > > + /* the page should be here, and dirty */ > > + if (unlikely(!page)) { > > + xtrace("find_get_page ret NULL."); > > + goto out; > > + } > > + if (create && PageChecked(page)) { > > + ClearPageChecked(page); > > + tux3_da_release_blocks(inode->i_sb, 1); > > + } > > + if (page) > > + page_cache_release(page); > > + > > +out: > > + return tux3_get_block(inode, iblock, bh_rslt, create); > > +} > > + > > +static int tux3_da_write_begin(struct file *file, struct address_space > > *mapping, > > + loff_t pos, unsigned len, unsigned flags, > > + struct page **pagep, void **fsdata) > > +{ > > + return nobh_write_begin(file, mapping, pos, len, flags, pagep, > fsdata, > > + tux3_get_block_delay); > > +} > > + > > +static int tux3_da_write_end(struct file *file, struct address_space > > *mapping, > > + loff_t pos, unsigned len, unsigned copied, > > + struct page *page, void *fsdata) > > +{ > > + if (!PageChecked(page)) { > > + int ret = tux3_da_reserve_blocks(mapping->host->i_sb, 1); > > + if (ret) > > + return ret; > > + SetPageChecked(page); > > + } > > + return nobh_write_end(file, mapping, pos, len, copied, page, > > fsdata); > > +} > > + > > +static int tux3_da_writepage(struct page *page, struct writeback_control > > *wbc) > > +{ > > + return nobh_writepage(page, tux3_get_block_write, wbc); > > +} > > +static int tux3_da_writepages(struct address_space *mapping, > > + struct writeback_control *wbc) > > +{ > > + return mpage_writepages(mapping, wbc, tux3_get_block_write); > > +} > > + > > +const struct address_space_operations tux_da_aops = { > > + .readpage = tux3_readpage, > > + .readpages = tux3_readpages, > > + .writepage = tux3_da_writepage, > > + .writepages = tux3_da_writepages, > > + .sync_page = block_sync_page, > > + .write_begin = tux3_da_write_begin, > > + .write_end = tux3_da_write_end, > > + .bmap = tux3_bmap, > > + .direct_IO = tux3_direct_IO, > > + .migratepage = buffer_migrate_page, > > +}; > > + > > #endif /* __KERNEL__ */ > > diff -pNur tux-orig/inode.c tux-hack/inode.c > > --- tux-orig/inode.c 2009-01-14 20:00:22.000000000 +0800 > > +++ tux-hack/inode.c 2009-01-15 15:29:43.000000000 +0800 > > @@ -438,7 +438,7 @@ static void tux_setup_inode(struct inode > > case S_IFREG: > > inode->i_op = &tux_file_iops; > > inode->i_fop = &tux_file_fops; > > - inode->i_mapping->a_ops = &tux_aops; > > + inode->i_mapping->a_ops = &tux_da_aops; > > break; > > case S_IFDIR: > > inode->i_op = &tux_dir_iops; > > diff -pNur tux-orig/super.c tux-hack/super.c > > --- tux-orig/super.c 2009-01-14 20:00:22.000000000 +0800 > > +++ tux-hack/super.c 2009-01-15 14:27:53.000000000 +0800 > > @@ -106,6 +106,9 @@ static void tux3_put_super(struct super_ > > iput(sbi->volmap); > > iput(sbi->logmap); > > > > + /* destroy block allocation info */ > > + tux3_balloc_info_destroy(sbi); > > + > > sb->s_fs_info = NULL; > > kfree(sbi); > > } > > @@ -172,6 +175,10 @@ static int tux3_fill_super(struct super_ > > err = tux_load_sb(sb, silent); > > if (err) > > goto error; > > + > > + /* initialize block allocation info */ > > + tux3_balloc_info_init(sbi); > > + > > printk("%s: sb %p, ops %p, depth %Lu, block %Lu, entries_per_leaf > > %d\n", > > __func__, > > sbi->itable.sb, sbi->itable.ops, > > diff -pNur tux-orig/trace.h tux-hack/trace.h > > --- tux-orig/trace.h 2009-01-14 20:00:22.000000000 +0800 > > +++ tux-hack/trace.h 2009-01-15 15:04:49.000000000 +0800 > > @@ -22,4 +22,15 @@ > > die(100); \ > > } while (0) > > > > + > > +#ifdef __KERNEL__ > > +/* debug macro, xiaofeng */ > > +#define xtrace(f, a...) { \ > > + printk ("(%s, %d): %s:", \ > > + __FILE__, __LINE__, __FUNCTION__); \ > > + printk (f, ## a); \ > > + printk ("\n"); \ > > + } > > + > > +#endif > > #endif > > diff -pNur tux-orig/tux3.h tux-hack/tux3.h > > --- tux-orig/tux3.h 2009-01-14 20:00:22.000000000 +0800 > > +++ tux-hack/tux3.h 2009-01-15 15:34:37.000000000 +0800 > > @@ -9,6 +9,8 @@ > > #include <linux/fs.h> > > #include <linux/buffer_head.h> > > #include <linux/mutex.h> > > +#include <linux/mm.h> > > +#include <linux/percpu_counter.h> > > > > #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) > > #include <linux/cred.h> // fsuid > > @@ -213,6 +215,13 @@ struct cursor { > > } path[]; > > }; > > > > +/* Tux3 block allocation information */ > > +struct tux3_balloc_info { > > + struct percpu_counter freeblocks_counter; > > + /* nextalloc_counter, and others */ > > +}; > > +#define freeblocks_counter(sbi) > (&sbi->balloc_info.freeblocks_counter) > > + > > /* Tux3-specific sb is a handle for the entire volume state */ > > > > struct sb { > > @@ -241,6 +250,7 @@ struct sb { > > struct mutex loglock; /* serialize log entries (spinlock me) */ > > #ifdef __KERNEL__ > > struct super_block *vfs_sb; /* Generic kernel superblock */ > > + struct tux3_balloc_info balloc_info; /* control info for block > > allocation */ > > #else > > struct dev *dev; /* userspace block device */ > > #endif > > @@ -620,6 +630,25 @@ static inline struct inode *buffer_inode > > return buffer->b_page->mapping->host; > > } > > > > +static inline void tux3_balloc_info_init(struct sb* sbi) > > +{ > > + percpu_counter_init(freeblocks_counter(sbi), sbi->freeblocks); > > +} > > +static inline void tux3_balloc_info_destroy(struct sb* sbi) > > +{ > > + percpu_counter_destroy(freeblocks_counter(sbi)); > > +} > > + > > +static inline void tux3_release_blocks(struct sb* sbi, int count) > > +{ > > + percpu_counter_add(freeblocks_counter(sbi), count); > > +} > > + > > +static inline void tux3_reserve_blocks(struct sb* sbi, int count) > > +{ > > + percpu_counter_sub(freeblocks_counter(sbi), count); > > +} > > + > > /* btree.c */ > > struct buffer_head *cursor_leafbuf(struct cursor *cursor); > > void release_cursor(struct cursor *cursor); > > @@ -678,6 +707,7 @@ int tux3_get_block(struct inode *inode, > > extern const struct address_space_operations tux_aops; > > extern const struct address_space_operations tux_blk_aops; > > extern const struct address_space_operations tux_vol_aops; > > +extern const struct address_space_operations tux_da_aops; > > > > /* iattr.c */ > > unsigned encode_asize(unsigned bits); > > > > > > _______________________________________________ > > Tux3 mailing list > > [email protected] > > http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3 > > > > > -- > Thanks & Best Regards > Liu Hui > -- >
_______________________________________________ Tux3 mailing list [email protected] http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3
