On Tue, Nov 06, 2018 at 02:41:13PM +0800, Lu Fengqi wrote: > From: Wang Xiaoguang <wangxg.f...@cn.fujitsu.com> > > Introduce static function inmem_del() to remove hash from in-memory > dedupe tree. > And implement btrfs_dedupe_del() and btrfs_dedup_disable() interfaces. > > Also for btrfs_dedupe_disable(), add new functions to wait existing > writer and block incoming writers to eliminate all possible race. > > Cc: Mark Fasheh <mfas...@suse.de> > Signed-off-by: Qu Wenruo <quwen...@cn.fujitsu.com> > Signed-off-by: Wang Xiaoguang <wangxg.f...@cn.fujitsu.com> > Signed-off-by: Lu Fengqi <lufq.f...@cn.fujitsu.com> > --- > fs/btrfs/dedupe.c | 131 +++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 125 insertions(+), 6 deletions(-) > > diff --git a/fs/btrfs/dedupe.c b/fs/btrfs/dedupe.c > index 784bb3a8a5ab..951fefd19fde 100644 > --- a/fs/btrfs/dedupe.c > +++ b/fs/btrfs/dedupe.c > @@ -170,12 +170,6 @@ int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, > return ret; > } > > -int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info) > -{ > - /* Place holder for bisect, will be implemented in later patches */ > - return 0; > -} > - > static int inmem_insert_hash(struct rb_root *root, > struct inmem_hash *hash, int hash_len) > { > @@ -317,3 +311,128 @@ int btrfs_dedupe_add(struct btrfs_fs_info *fs_info, > return inmem_add(dedupe_info, hash); > return -EINVAL; > } > + > +static struct inmem_hash * > +inmem_search_bytenr(struct btrfs_dedupe_info *dedupe_info, u64 bytenr) > +{ > + struct rb_node **p = &dedupe_info->bytenr_root.rb_node; > + struct rb_node *parent = NULL; > + struct inmem_hash *entry = NULL; > + > + while (*p) { > + parent = *p; > + entry = rb_entry(parent, struct inmem_hash, bytenr_node); > + > + if (bytenr < entry->bytenr) > + p = &(*p)->rb_left; > + else if (bytenr > entry->bytenr) > + p = &(*p)->rb_right; > + else > + return entry; > + } > + > + return NULL; > +} > + > +/* Delete a hash from in-memory dedupe tree */ > +static int inmem_del(struct btrfs_dedupe_info *dedupe_info, u64 bytenr) > +{ > + struct inmem_hash *hash; > + > + mutex_lock(&dedupe_info->lock); > + hash = inmem_search_bytenr(dedupe_info, bytenr); > + if (!hash) { > + mutex_unlock(&dedupe_info->lock); > + return 0; > + } > + > + __inmem_del(dedupe_info, hash); > + mutex_unlock(&dedupe_info->lock); > + return 0; > +} > + > +/* Remove a dedupe hash from dedupe tree */ > +int btrfs_dedupe_del(struct btrfs_fs_info *fs_info, u64 bytenr) > +{ > + struct btrfs_dedupe_info *dedupe_info = fs_info->dedupe_info; > + > + if (!fs_info->dedupe_enabled) > + return 0; > + > + if (WARN_ON(dedupe_info == NULL)) > + return -EINVAL; > + > + if (dedupe_info->backend == BTRFS_DEDUPE_BACKEND_INMEMORY) > + return inmem_del(dedupe_info, bytenr); > + return -EINVAL; > +} > + > +static void inmem_destroy(struct btrfs_dedupe_info *dedupe_info) > +{ > + struct inmem_hash *entry, *tmp; > + > + mutex_lock(&dedupe_info->lock); > + list_for_each_entry_safe(entry, tmp, &dedupe_info->lru_list, lru_list) > + __inmem_del(dedupe_info, entry); > + mutex_unlock(&dedupe_info->lock); > +} > + > +/* > + * Helper function to wait and block all incoming writers > + * > + * Use rw_sem introduced for freeze to wait/block writers. > + * So during the block time, no new write will happen, so we can > + * do something quite safe, espcially helpful for dedupe disable, > + * as it affect buffered write. > + */ > +static void block_all_writers(struct btrfs_fs_info *fs_info) > +{ > + struct super_block *sb = fs_info->sb; > + > + percpu_down_write(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1); > + down_write(&sb->s_umount); > +} > + > +static void unblock_all_writers(struct btrfs_fs_info *fs_info) > +{ > + struct super_block *sb = fs_info->sb; > + > + up_write(&sb->s_umount); > + percpu_up_write(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1); > +}
Please use the sb_ helpers, don't open code this. > + > +int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info) > +{ > + struct btrfs_dedupe_info *dedupe_info; > + int ret; > + > + dedupe_info = fs_info->dedupe_info; > + > + if (!dedupe_info) > + return 0; > + > + /* Don't allow disable status change in RO mount */ > + if (fs_info->sb->s_flags & MS_RDONLY) > + return -EROFS; > + > + /* > + * Wait for all unfinished writers and block further writers. > + * Then sync the whole fs so all current write will go through > + * dedupe, and all later write won't go through dedupe. > + */ > + block_all_writers(fs_info); > + ret = sync_filesystem(fs_info->sb); > + fs_info->dedupe_enabled = 0; > + fs_info->dedupe_info = NULL; > + unblock_all_writers(fs_info); This is awful, don't do this. Thanks, Josef