Re: [f2fs-dev] [PATCH 1/2] f2fs: check only data or node for summary

2016-07-05 Thread He YunLei

On 2016/6/11 5:01, Jaegeuk Kim wrote:

We can check data or node types only for gc, since we allocate different type of
data/node blocks in a different logs occasionally.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/gc.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e1d274c..c2c4ac3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -806,7 +806,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
f2fs_put_page(sum_page, 0);

sum = page_address(sum_page);
-   f2fs_bug_on(sbi, type != GET_SUM_TYPE((>footer)));
+   f2fs_bug_on(sbi, IS_DATASEG(type) !=
+   IS_DATASEG(GET_SUM_TYPE((>footer;


Hi, Kim
type has been transformed into SUM_TYPE_DATA or SUM_TYPE_NODE, so here
no need to do this.

Thanks


/*
 * this is to avoid deadlock:





Re: [f2fs-dev] [PATCH 1/2] f2fs: check only data or node for summary

2016-07-05 Thread He YunLei

On 2016/6/11 5:01, Jaegeuk Kim wrote:

We can check data or node types only for gc, since we allocate different type of
data/node blocks in a different logs occasionally.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/gc.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e1d274c..c2c4ac3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -806,7 +806,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
f2fs_put_page(sum_page, 0);

sum = page_address(sum_page);
-   f2fs_bug_on(sbi, type != GET_SUM_TYPE((>footer)));
+   f2fs_bug_on(sbi, IS_DATASEG(type) !=
+   IS_DATASEG(GET_SUM_TYPE((>footer;


Hi, Kim
type has been transformed into SUM_TYPE_DATA or SUM_TYPE_NODE, so here
no need to do this.

Thanks


/*
 * this is to avoid deadlock:





Re: [f2fs-dev] [PATCH 1/4] f2fs: propagate error given by f2fs_find_entry

2016-05-26 Thread He YunLei

On 2016/5/27 8:25, Jaegeuk Kim wrote:

If we get ENOMEM or EIO in f2fs_find_entry, we should stop right away.
Otherwise, for example, we can get duplicate directory entry by ->chash and
->clevel.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/dir.c| 23 ---
  fs/f2fs/inline.c |  4 +++-
  fs/f2fs/namei.c  |  5 +
  3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 24d1308..ae37543 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -185,8 +185,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode 
*dir,
/* no need to allocate new dentry pages to all the indices */
dentry_page = find_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
-   room = true;
-   continue;
+   if (PTR_ERR(dentry_page) == -ENOENT) {
+   room = true;
+   continue;
+   } else {
+   *res_page = dentry_page;
+   break;
+   }
}

de = find_in_block(dentry_page, fname, namehash, _slots,
@@ -223,19 +228,22 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct fscrypt_name fname;
int err;

-   *res_page = NULL;
-
err = fscrypt_setup_filename(dir, child, 1, );
-   if (err)
+   if (err) {
+   *res_page = ERR_PTR(-ENOMEM);
return NULL;
+   }

if (f2fs_has_inline_dentry(dir)) {
+   *res_page = NULL;
de = find_in_inline_dir(dir, , res_page);
goto out;
}

-   if (npages == 0)
+   if (npages == 0) {
+   *res_page = NULL;
goto out;
+   }

max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
@@ -247,8 +255,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
}

for (level = 0; level < max_depth; level++) {
+   *res_page = NULL;
de = find_in_level(dir, level, , res_page);
-   if (de)
+   if (de || IS_ERR(*res_page))
break;
}


Hi, kim
Here, we return NULL for the error of find_data_page, it means
the file looked up is not exist to vfs, but may be the file has already exist
behind the block read error. So maybe we 'd better reported the error to vfs.

Thanks.


  out:
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 77c9c24..1eb3043 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -286,8 +286,10 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode 
*dir,
f2fs_hash_t namehash;

ipage = get_node_page(sbi, dir->i_ino);
-   if (IS_ERR(ipage))
+   if (IS_ERR(ipage)) {
+   *res_page = ipage;
return NULL;
+   }

namehash = f2fs_dentry_hash();

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 496f4e3..3f6119e 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -232,6 +232,9 @@ static int __recover_dot_dentries(struct inode *dir, nid_t 
pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
+   } else if (IS_ERR(page)) {
+   err = PTR_ERR(page);
+   goto out;
} else {
err = __f2fs_add_link(dir, , NULL, dir->i_ino, S_IFDIR);
if (err)
@@ -242,6 +245,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t 
pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
+   } else if (IS_ERR(page)) {
+   err = PTR_ERR(page);
} else {
err = __f2fs_add_link(dir, , NULL, pino, S_IFDIR);
}





Re: [f2fs-dev] [PATCH 1/4] f2fs: propagate error given by f2fs_find_entry

2016-05-26 Thread He YunLei

On 2016/5/27 8:25, Jaegeuk Kim wrote:

If we get ENOMEM or EIO in f2fs_find_entry, we should stop right away.
Otherwise, for example, we can get duplicate directory entry by ->chash and
->clevel.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/dir.c| 23 ---
  fs/f2fs/inline.c |  4 +++-
  fs/f2fs/namei.c  |  5 +
  3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 24d1308..ae37543 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -185,8 +185,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode 
*dir,
/* no need to allocate new dentry pages to all the indices */
dentry_page = find_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
-   room = true;
-   continue;
+   if (PTR_ERR(dentry_page) == -ENOENT) {
+   room = true;
+   continue;
+   } else {
+   *res_page = dentry_page;
+   break;
+   }
}

de = find_in_block(dentry_page, fname, namehash, _slots,
@@ -223,19 +228,22 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct fscrypt_name fname;
int err;

-   *res_page = NULL;
-
err = fscrypt_setup_filename(dir, child, 1, );
-   if (err)
+   if (err) {
+   *res_page = ERR_PTR(-ENOMEM);
return NULL;
+   }

if (f2fs_has_inline_dentry(dir)) {
+   *res_page = NULL;
de = find_in_inline_dir(dir, , res_page);
goto out;
}

-   if (npages == 0)
+   if (npages == 0) {
+   *res_page = NULL;
goto out;
+   }

max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
@@ -247,8 +255,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
}

for (level = 0; level < max_depth; level++) {
+   *res_page = NULL;
de = find_in_level(dir, level, , res_page);
-   if (de)
+   if (de || IS_ERR(*res_page))
break;
}


Hi, kim
Here, we return NULL for the error of find_data_page, it means
the file looked up is not exist to vfs, but may be the file has already exist
behind the block read error. So maybe we 'd better reported the error to vfs.

Thanks.


  out:
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 77c9c24..1eb3043 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -286,8 +286,10 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode 
*dir,
f2fs_hash_t namehash;

ipage = get_node_page(sbi, dir->i_ino);
-   if (IS_ERR(ipage))
+   if (IS_ERR(ipage)) {
+   *res_page = ipage;
return NULL;
+   }

namehash = f2fs_dentry_hash();

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 496f4e3..3f6119e 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -232,6 +232,9 @@ static int __recover_dot_dentries(struct inode *dir, nid_t 
pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
+   } else if (IS_ERR(page)) {
+   err = PTR_ERR(page);
+   goto out;
} else {
err = __f2fs_add_link(dir, , NULL, dir->i_ino, S_IFDIR);
if (err)
@@ -242,6 +245,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t 
pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
+   } else if (IS_ERR(page)) {
+   err = PTR_ERR(page);
} else {
err = __f2fs_add_link(dir, , NULL, pino, S_IFDIR);
}





Re: [f2fs-dev] [PATCH 1/2] f2fs: use atomic variable for total_extent_tree

2015-12-29 Thread He YunLei

On 2015/12/22 11:38, Jaegeuk Kim wrote:

It would be better to use atomic variable for total_extent_tree.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/debug.c| 5 +++--
  fs/f2fs/extent_cache.c | 8 
  fs/f2fs/f2fs.h | 2 +-
  fs/f2fs/node.c | 3 ++-
  fs/f2fs/shrinker.c | 3 ++-
  5 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index bb307e6..ed5dfcc 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -38,7 +38,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->hit_rbtree = atomic64_read(>read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic64_read(>total_hit_ext);
-   si->ext_tree = sbi->total_ext_tree;
+   si->ext_tree = atomic_read(>total_ext_tree);
si->ext_node = atomic_read(>total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
@@ -193,7 +193,8 @@ get_cache:
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
for (i = 0; i <= UPDATE_INO; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
-   si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
+   si->cache_mem += atomic_read(>total_ext_tree) *
+   sizeof(struct extent_tree);
si->cache_mem += atomic_read(>total_ext_node) *
sizeof(struct extent_node);

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index e86e9f1e..0e97d6af 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -70,7 +70,7 @@ static struct extent_tree *__grab_extent_tree(struct inode 
*inode)
rwlock_init(>lock);
atomic_set(>refcount, 0);
et->count = 0;
-   sbi->total_ext_tree++;
+   atomic_inc(>total_ext_tree);
}
atomic_inc(>refcount);
up_write(>extent_tree_lock);
@@ -570,7 +570,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)

radix_tree_delete(root, et->ino);
kmem_cache_free(extent_tree_slab, et);
-   sbi->total_ext_tree--;
+   atomic_dec(>total_ext_tree);
tree_cnt++;

if (node_cnt + tree_cnt >= nr_shrink)
@@ -663,7 +663,7 @@ void f2fs_destroy_extent_tree(struct inode *inode)
f2fs_bug_on(sbi, atomic_read(>refcount) || et->count);
radix_tree_delete(>extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
-   sbi->total_ext_tree--;
+   atomic_dec(>total_ext_tree);
up_write(>extent_tree_lock);

F2FS_I(inode)->extent_tree = NULL;
@@ -715,7 +715,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
init_rwsem(>extent_tree_lock);
INIT_LIST_HEAD(>extent_list);
spin_lock_init(>extent_lock);
-   sbi->total_ext_tree = 0;
+   atomic_set(>total_ext_tree, 0);

Hi,
here we'd better to init total_zombie_tree:
atomic_set(>total_zombie_tree, 0);
Thanks,

atomic_set(>total_ext_node, 0);
  }

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 19beabe..a7f6191 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -762,7 +762,7 @@ struct f2fs_sb_info {
struct rw_semaphore extent_tree_lock;   /* locking extent radix tree */
struct list_head extent_list;   /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
-   int total_ext_tree; /* extent tree count */
+   atomic_t total_ext_tree;/* extent tree count */
atomic_t total_ext_node;/* extent info count */

/* basic filesystem units */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d842b19..6cc8ac7 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -65,7 +65,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == EXTENT_CACHE) {
-   mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
+   mem_size = (atomic_read(>total_ext_tree) *
+   sizeof(struct extent_tree) +
atomic_read(>total_ext_node) *
sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index da0d8e0..a11e099 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -32,7 +32,8 @@ static 

Re: [f2fs-dev] [PATCH 1/2] f2fs: use atomic variable for total_extent_tree

2015-12-29 Thread He YunLei

On 2015/12/22 11:38, Jaegeuk Kim wrote:

It would be better to use atomic variable for total_extent_tree.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/debug.c| 5 +++--
  fs/f2fs/extent_cache.c | 8 
  fs/f2fs/f2fs.h | 2 +-
  fs/f2fs/node.c | 3 ++-
  fs/f2fs/shrinker.c | 3 ++-
  5 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index bb307e6..ed5dfcc 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -38,7 +38,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->hit_rbtree = atomic64_read(>read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic64_read(>total_hit_ext);
-   si->ext_tree = sbi->total_ext_tree;
+   si->ext_tree = atomic_read(>total_ext_tree);
si->ext_node = atomic_read(>total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
@@ -193,7 +193,8 @@ get_cache:
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
for (i = 0; i <= UPDATE_INO; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
-   si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
+   si->cache_mem += atomic_read(>total_ext_tree) *
+   sizeof(struct extent_tree);
si->cache_mem += atomic_read(>total_ext_node) *
sizeof(struct extent_node);

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index e86e9f1e..0e97d6af 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -70,7 +70,7 @@ static struct extent_tree *__grab_extent_tree(struct inode 
*inode)
rwlock_init(>lock);
atomic_set(>refcount, 0);
et->count = 0;
-   sbi->total_ext_tree++;
+   atomic_inc(>total_ext_tree);
}
atomic_inc(>refcount);
up_write(>extent_tree_lock);
@@ -570,7 +570,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int nr_shrink)

radix_tree_delete(root, et->ino);
kmem_cache_free(extent_tree_slab, et);
-   sbi->total_ext_tree--;
+   atomic_dec(>total_ext_tree);
tree_cnt++;

if (node_cnt + tree_cnt >= nr_shrink)
@@ -663,7 +663,7 @@ void f2fs_destroy_extent_tree(struct inode *inode)
f2fs_bug_on(sbi, atomic_read(>refcount) || et->count);
radix_tree_delete(>extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
-   sbi->total_ext_tree--;
+   atomic_dec(>total_ext_tree);
up_write(>extent_tree_lock);

F2FS_I(inode)->extent_tree = NULL;
@@ -715,7 +715,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
init_rwsem(>extent_tree_lock);
INIT_LIST_HEAD(>extent_list);
spin_lock_init(>extent_lock);
-   sbi->total_ext_tree = 0;
+   atomic_set(>total_ext_tree, 0);

Hi,
here we'd better to init total_zombie_tree:
atomic_set(>total_zombie_tree, 0);
Thanks,

atomic_set(>total_ext_node, 0);
  }

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 19beabe..a7f6191 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -762,7 +762,7 @@ struct f2fs_sb_info {
struct rw_semaphore extent_tree_lock;   /* locking extent radix tree */
struct list_head extent_list;   /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
-   int total_ext_tree; /* extent tree count */
+   atomic_t total_ext_tree;/* extent tree count */
atomic_t total_ext_node;/* extent info count */

/* basic filesystem units */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d842b19..6cc8ac7 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -65,7 +65,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == EXTENT_CACHE) {
-   mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
+   mem_size = (atomic_read(>total_ext_tree) *
+   sizeof(struct extent_tree) +
atomic_read(>total_ext_node) *
sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index da0d8e0..a11e099 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -32,7 

Re: [f2fs-dev] [PATCH 2/2] f2fs: speed up shrinking extent tree entries

2015-12-22 Thread He YunLei

On 2015/12/22 13:20, Chao Yu wrote:

Hi Jaegeuk,

We should update _zombie_tree whenever removing unreferenced
extent tree during shrinking:
- f2fs_shrink_extent_tree
if (!atomic_read(>refcount)) {
...
atomic_dec(>total_ext_tree);
atomic_dec(>total_zombie_tree);
...
}

Other parts look good to me. :)

Reviewed-by: Chao Yu 

Thanks,


-Original Message-
From: Jaegeuk Kim [mailto:jaeg...@kernel.org]
Sent: Tuesday, December 22, 2015 11:39 AM
To: linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org;
linux-f2fs-de...@lists.sourceforge.net
Cc: Jaegeuk Kim
Subject: [f2fs-dev] [PATCH 2/2] f2fs: speed up shrinking extent tree entries

If there is no candidates for shrinking slab entries, we don't need to traverse
any trees at all.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/extent_cache.c | 12 
  fs/f2fs/f2fs.h |  1 +
  fs/f2fs/shrinker.c |  2 +-
  3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 0e97d6af..32693af 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -71,6 +71,8 @@ static struct extent_tree *__grab_extent_tree(struct inode 
*inode)
atomic_set(>refcount, 0);
et->count = 0;
atomic_inc(>total_ext_tree);
+   } else {
+   atomic_dec(>total_zombie_tree);
}
atomic_inc(>refcount);
up_write(>extent_tree_lock);
@@ -547,10 +549,14 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int
nr_shrink)
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
+   bool do_free = false;

if (!test_opt(sbi, EXTENT_CACHE))
return 0;

+   if (!atomic_read(>total_zombie_tree))
+   goto free_node;
+
if (!down_write_trylock(>extent_tree_lock))
goto out;

@@ -580,6 +586,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int
nr_shrink)
}
up_write(>extent_tree_lock);

+free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(>extent_tree_lock))
goto out;
@@ -591,9 +598,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int
nr_shrink)
if (!remained--)
break;
list_del_init(>list);
+   do_free = true;
}
spin_unlock(>extent_lock);

+   if (do_free == false)
+   goto unlock_out;
+
/*
 * reset ino for searching victims from beginning of global extent tree.
 */
@@ -651,6 +662,7 @@ void f2fs_destroy_extent_tree(struct inode *inode)

if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
atomic_dec(>refcount);
+   atomic_dec(>total_zombie_tree);
return;
}

Hi,all
here, sbi->total_ext_tree-- also should change to
atomic_dec(>total_ext_tree);
Thanks,



diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a7f6191..90fb970 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -763,6 +763,7 @@ struct f2fs_sb_info {
struct list_head extent_list;   /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
atomic_t total_ext_tree;/* extent tree count */
+   atomic_t total_zombie_tree; /* extent zombie tree count */
atomic_t total_ext_node;/* extent info count */

/* basic filesystem units */
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index a11e099..93606f2 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -32,7 +32,7 @@ static unsigned long __count_free_nids(struct f2fs_sb_info 
*sbi)

  static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
  {
-   return atomic_read(>total_ext_tree) +
+   return atomic_read(>total_zombie_tree) +
atomic_read(>total_ext_node);
  }

--
2.5.4 (Apple Git-61)


--
___
Linux-f2fs-devel mailing list
linux-f2fs-de...@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel



--
___
Linux-f2fs-devel mailing list
linux-f2fs-de...@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [f2fs-dev] [PATCH 2/2] f2fs: speed up shrinking extent tree entries

2015-12-22 Thread He YunLei

On 2015/12/22 13:20, Chao Yu wrote:

Hi Jaegeuk,

We should update _zombie_tree whenever removing unreferenced
extent tree during shrinking:
- f2fs_shrink_extent_tree
if (!atomic_read(>refcount)) {
...
atomic_dec(>total_ext_tree);
atomic_dec(>total_zombie_tree);
...
}

Other parts look good to me. :)

Reviewed-by: Chao Yu 

Thanks,


-Original Message-
From: Jaegeuk Kim [mailto:jaeg...@kernel.org]
Sent: Tuesday, December 22, 2015 11:39 AM
To: linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org;
linux-f2fs-de...@lists.sourceforge.net
Cc: Jaegeuk Kim
Subject: [f2fs-dev] [PATCH 2/2] f2fs: speed up shrinking extent tree entries

If there is no candidates for shrinking slab entries, we don't need to traverse
any trees at all.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/extent_cache.c | 12 
  fs/f2fs/f2fs.h |  1 +
  fs/f2fs/shrinker.c |  2 +-
  3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 0e97d6af..32693af 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -71,6 +71,8 @@ static struct extent_tree *__grab_extent_tree(struct inode 
*inode)
atomic_set(>refcount, 0);
et->count = 0;
atomic_inc(>total_ext_tree);
+   } else {
+   atomic_dec(>total_zombie_tree);
}
atomic_inc(>refcount);
up_write(>extent_tree_lock);
@@ -547,10 +549,14 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int
nr_shrink)
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
+   bool do_free = false;

if (!test_opt(sbi, EXTENT_CACHE))
return 0;

+   if (!atomic_read(>total_zombie_tree))
+   goto free_node;
+
if (!down_write_trylock(>extent_tree_lock))
goto out;

@@ -580,6 +586,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int
nr_shrink)
}
up_write(>extent_tree_lock);

+free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(>extent_tree_lock))
goto out;
@@ -591,9 +598,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info 
*sbi, int
nr_shrink)
if (!remained--)
break;
list_del_init(>list);
+   do_free = true;
}
spin_unlock(>extent_lock);

+   if (do_free == false)
+   goto unlock_out;
+
/*
 * reset ino for searching victims from beginning of global extent tree.
 */
@@ -651,6 +662,7 @@ void f2fs_destroy_extent_tree(struct inode *inode)

if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
atomic_dec(>refcount);
+   atomic_dec(>total_zombie_tree);
return;
}

Hi,all
here, sbi->total_ext_tree-- also should change to
atomic_dec(>total_ext_tree);
Thanks,



diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a7f6191..90fb970 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -763,6 +763,7 @@ struct f2fs_sb_info {
struct list_head extent_list;   /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
atomic_t total_ext_tree;/* extent tree count */
+   atomic_t total_zombie_tree; /* extent zombie tree count */
atomic_t total_ext_node;/* extent info count */

/* basic filesystem units */
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index a11e099..93606f2 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -32,7 +32,7 @@ static unsigned long __count_free_nids(struct f2fs_sb_info 
*sbi)

  static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
  {
-   return atomic_read(>total_ext_tree) +
+   return atomic_read(>total_zombie_tree) +
atomic_read(>total_ext_node);
  }

--
2.5.4 (Apple Git-61)


--
___
Linux-f2fs-devel mailing list
linux-f2fs-de...@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel



--
___
Linux-f2fs-devel mailing list
linux-f2fs-de...@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [f2fs-dev] [PATCH 2/3] f2fs: add a tracepoint for background gc

2015-10-10 Thread He YunLei

On 2015/10/6 6:43, Jaegeuk Kim wrote:

This patch introduces a tracepoint to monitor background gc behaviors.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/gc.c|  3 +++
  include/trace/events/f2fs.h | 28 
  2 files changed, 31 insertions(+)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e627c19..e7cec86 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -77,6 +77,9 @@ static int gc_thread_func(void *data)

stat_inc_bggc_count(sbi);

+   trace_f2fs_background_gc(sbi->sb, wait_ms,
+   prefree_segments(sbi), free_segments(sbi));
+
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
wait_ms = gc_th->no_gc_sleep_time;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 6aa63d9..7de751d 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -514,6 +514,34 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->ret)
  );

+TRACE_EVENT(f2fs_background_gc,
+
+   TP_PROTO(struct super_block *sb, long wait_ms,
+   unsigned int prefree, unsigned int free),
+
+   TP_ARGS(sb, wait_ms, prefree, free),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(long,   wait_ms)
+   __field(unsigned int,   prefree)
+   __field(unsigned int,   free)
+   ),
+
+   TP_fast_assign(
+   __entry->dev = sb->s_dev;
+   __entry->wait_ms = wait_ms;
+   __entry->prefree = prefree;
+   __entry->free= free;
+   ),
+
+   TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
+   show_dev(__entry),
+   __entry->wait_ms,
+   __entry->prefree,
+   __entry->free)
+);
+

hi,
I open f2fs_background_gc and f2fs_get_victim trace points, first, the 
partition has no dirty segment, so no victim is
selected, wait_ms is equal to 300s.

f2fs_gc-8:33-2827  [001]   5014.608396: f2fs_background_gc: dev = 
(8,33), wait_ms = 30, prefree = 0, free = 73
f2fs_gc-8:33-2827  [000]   5314.793436: f2fs_background_gc: dev = 
(8,33), wait_ms = 30, prefree = 0, free = 73

then I made the partition satisfied the condition of 
has_enough_invalid_blocks, gc can get a victim and do garbage collection, 
wait_ms = 30s

f2fs_gc-8:33-2827  [001]   5614.978486: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 54
f2fs_gc-8:33-2827  [001]   5614.978538: f2fs_get_victim: dev = (8,33), 
type = No TYPE, policy = (Background GC, LFS-mode, Cost-Benefit), victim = 441 
ofs_unit = 1, pre_victim_secno = -1, prefree = 0, free = 54
f2fs_gc-8:33-2827  [000]   5644.996989: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 54
f2fs_gc-8:33-2827  [000]   5644.997027: f2fs_get_victim: dev = (8,33), 
type = No TYPE, policy = (Background GC, LFS-mode, Cost-Benefit), victim = 97 
ofs_unit = 1, pre_victim_secno = -1, prefree = 0, free = 54

when there no dirty segments left, I think wait_ms could turn back to 300s

f2fs_gc-8:33-2827  [000]   6305.596205: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 1, free = 64
f2fs_gc-8:33-2827  [001]   6605.781281: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 65
f2fs_gc-8:33-2827  [000]   6905.966301: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 65
f2fs_gc-8:33-2827  [001]   7206.151344: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 65

here, background gc triggers every 300s, but wait_ms is still 30s, I don't 
know why?

Best wishes,
Thanks

  TRACE_EVENT(f2fs_get_victim,

TP_PROTO(struct super_block *sb, int type, int gc_type,



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [f2fs-dev] [PATCH 2/3] f2fs: add a tracepoint for background gc

2015-10-10 Thread He YunLei

On 2015/10/6 6:43, Jaegeuk Kim wrote:

This patch introduces a tracepoint to monitor background gc behaviors.

Signed-off-by: Jaegeuk Kim 
---
  fs/f2fs/gc.c|  3 +++
  include/trace/events/f2fs.h | 28 
  2 files changed, 31 insertions(+)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e627c19..e7cec86 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -77,6 +77,9 @@ static int gc_thread_func(void *data)

stat_inc_bggc_count(sbi);

+   trace_f2fs_background_gc(sbi->sb, wait_ms,
+   prefree_segments(sbi), free_segments(sbi));
+
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
wait_ms = gc_th->no_gc_sleep_time;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 6aa63d9..7de751d 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -514,6 +514,34 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->ret)
  );

+TRACE_EVENT(f2fs_background_gc,
+
+   TP_PROTO(struct super_block *sb, long wait_ms,
+   unsigned int prefree, unsigned int free),
+
+   TP_ARGS(sb, wait_ms, prefree, free),
+
+   TP_STRUCT__entry(
+   __field(dev_t,  dev)
+   __field(long,   wait_ms)
+   __field(unsigned int,   prefree)
+   __field(unsigned int,   free)
+   ),
+
+   TP_fast_assign(
+   __entry->dev = sb->s_dev;
+   __entry->wait_ms = wait_ms;
+   __entry->prefree = prefree;
+   __entry->free= free;
+   ),
+
+   TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
+   show_dev(__entry),
+   __entry->wait_ms,
+   __entry->prefree,
+   __entry->free)
+);
+

hi,
I open f2fs_background_gc and f2fs_get_victim trace points, first, the 
partition has no dirty segment, so no victim is
selected, wait_ms is equal to 300s.

f2fs_gc-8:33-2827  [001]   5014.608396: f2fs_background_gc: dev = 
(8,33), wait_ms = 30, prefree = 0, free = 73
f2fs_gc-8:33-2827  [000]   5314.793436: f2fs_background_gc: dev = 
(8,33), wait_ms = 30, prefree = 0, free = 73

then I made the partition satisfied the condition of 
has_enough_invalid_blocks, gc can get a victim and do garbage collection, 
wait_ms = 30s

f2fs_gc-8:33-2827  [001]   5614.978486: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 54
f2fs_gc-8:33-2827  [001]   5614.978538: f2fs_get_victim: dev = (8,33), 
type = No TYPE, policy = (Background GC, LFS-mode, Cost-Benefit), victim = 441 
ofs_unit = 1, pre_victim_secno = -1, prefree = 0, free = 54
f2fs_gc-8:33-2827  [000]   5644.996989: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 54
f2fs_gc-8:33-2827  [000]   5644.997027: f2fs_get_victim: dev = (8,33), 
type = No TYPE, policy = (Background GC, LFS-mode, Cost-Benefit), victim = 97 
ofs_unit = 1, pre_victim_secno = -1, prefree = 0, free = 54

when there no dirty segments left, I think wait_ms could turn back to 300s

f2fs_gc-8:33-2827  [000]   6305.596205: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 1, free = 64
f2fs_gc-8:33-2827  [001]   6605.781281: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 65
f2fs_gc-8:33-2827  [000]   6905.966301: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 65
f2fs_gc-8:33-2827  [001]   7206.151344: f2fs_background_gc: dev = 
(8,33), wait_ms = 3, prefree = 0, free = 65

here, background gc triggers every 300s, but wait_ms is still 30s, I don't 
know why?

Best wishes,
Thanks

  TRACE_EVENT(f2fs_get_victim,

TP_PROTO(struct super_block *sb, int type, int gc_type,



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [f2fs-dev] [PATCH 5/7] f2fs: enhance multithread dio write performance

2015-09-16 Thread He YunLei

On 2015/9/16 18:15, Chao Yu wrote:

Hi Jaegeuk,


-Original Message-
From: Jaegeuk Kim [mailto:jaeg...@kernel.org]
Sent: Wednesday, September 16, 2015 5:21 AM
To: Chao Yu
Cc: linux-f2fs-de...@lists.sourceforge.net; linux-kernel@vger.kernel.org
Subject: Re: [PATCH 5/7] f2fs: enhance multithread dio write performance

Hi Chao,

On Fri, Sep 11, 2015 at 02:41:53PM +0800, Chao Yu wrote:

When dio writes perform concurrently, our performace will be low because of
Thread A's allocation of multi continuous blocks will be break by Thread B,
there are two cases as below:
  - In Thread B, we may change current segment to a new segment for LFS
allocation if we dio write in the beginning of the file.
  - In Thread B, we may allocate blocks in the middle of Thread A's
allocation, which make blocks which allocated in Thread A being
discontinuous.

This patch adds writepages mutex lock to make block allocation in dio write
atomic to avoid above issues.

Test environment:
ubuntu os with linux kernel 4.2+, intel i7-3770, 16g memory,
32g kingston sd card.

fio --name seqw --ioengine=sync --invalidate=1 --rw=write --directory=/mnt/f2fs

--filesize=256m --size=16m --bs=2m --direct=1

--numjobs=10

before:
   WRITE: io=163840KB, aggrb=3145KB/s, minb=314KB/s, maxb=411KB/s, 
mint=39836msec,

maxt=52083msec


patched:
   WRITE: io=163840KB, aggrb=10033KB/s, minb=1003KB/s, maxb=1124KB/s, 
mint=14565msec,

maxt=16329msec


Signed-off-by: Chao Yu 
---
  fs/f2fs/data.c | 13 ++---
  1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a737ca5..a0a5849 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1536,7 +1536,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct 
iov_iter *iter,
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t count = iov_iter_count(iter);
+   int rw = iov_iter_rw(iter);
int err;

/* we don't need to use inline_data strictly */
@@ -1555,12 +1557,17 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, 
struct iov_iter

*iter,


trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));

-   if (iov_iter_rw(iter) == WRITE)
+   if (rw == WRITE) {
+   mutex_lock(>writepages);


Why do we have to share sbi->writepages?


The root cause of this issue is that: in f2fs, we have no suitable
dispatcher which can do the following things as an atomic operation:
a) allocate position(s) in flash device for current block(s);
b) submit user data in allocated position(s) in block layer.

Without the dispatcher, we will suffer performance issue in following
scenario:
Thread AThread BThread C
allocate pos+1
allocate pos+2
allocate pos+3
submit pos+1
submit pos+3
submit pos+2

Our final submitting series will: pos+1, pos+3, pos+2, this makes f2fs
running into non-LFS mode, therefore resulting in bad performance.

writepages mutex lock supply us with a good solution for above issue.
It not only make the allocating and submitting pair executing atomically,
but also reduce the fragmentation for one file since we submit blocks
belong to single inode as continuous as possible.

So here I choose to use writepages mutex lock to fix the performance
issue caused by both dio write vs dio write and dio write vs buffered
write.

If I'm missing something, please correct me.




__allocate_data_blocks(inode, offset, count);


If the problem lies on the misaligned blocks, how about calling mutex_unlock
here?


When changing to unlock here, I got regression when testing with following 
command:
fio --name seqw --ioengine=sync --invalidate=1 --rw=write --directory=/mnt/f2fs 
--filesize=256m --size=4m --bs=64k --direct=1
--numjobs=20

unlock here:
   WRITE: io=81920KB, aggrb=5802KB/s, minb=290KB/s, maxb=292KB/s, 
mint=14010msec, maxt=14119msec
unlock after dio finished:
   WRITE: io=81920KB, aggrb=6088KB/s, minb=304KB/s, maxb=1081KB/s, 
mint=3786msec, maxt=13454msec

So how about keep it in original place in this patch?


Does share writepages mutex lock have an effect on cache write? Here is 
AndroBench result on my phone:

Before patch:
1R1W   8R8W   16R16W
Sequential Write   161.31 163.85  154.67
Random  Write   9.48   17.66   18.09


After patch:
1R1W   8R8W   16R16W
Sequential Write   159.61 157.24  160.11
Random  Write   9.17   8.518.8

Unit:Mb/s, File size: 64M, Buffer size: 4k



Thanks,


Re: [f2fs-dev] [PATCH 5/7] f2fs: enhance multithread dio write performance

2015-09-16 Thread He YunLei

On 2015/9/16 18:15, Chao Yu wrote:

Hi Jaegeuk,


-Original Message-
From: Jaegeuk Kim [mailto:jaeg...@kernel.org]
Sent: Wednesday, September 16, 2015 5:21 AM
To: Chao Yu
Cc: linux-f2fs-de...@lists.sourceforge.net; linux-kernel@vger.kernel.org
Subject: Re: [PATCH 5/7] f2fs: enhance multithread dio write performance

Hi Chao,

On Fri, Sep 11, 2015 at 02:41:53PM +0800, Chao Yu wrote:

When dio writes perform concurrently, our performace will be low because of
Thread A's allocation of multi continuous blocks will be break by Thread B,
there are two cases as below:
  - In Thread B, we may change current segment to a new segment for LFS
allocation if we dio write in the beginning of the file.
  - In Thread B, we may allocate blocks in the middle of Thread A's
allocation, which make blocks which allocated in Thread A being
discontinuous.

This patch adds writepages mutex lock to make block allocation in dio write
atomic to avoid above issues.

Test environment:
ubuntu os with linux kernel 4.2+, intel i7-3770, 16g memory,
32g kingston sd card.

fio --name seqw --ioengine=sync --invalidate=1 --rw=write --directory=/mnt/f2fs

--filesize=256m --size=16m --bs=2m --direct=1

--numjobs=10

before:
   WRITE: io=163840KB, aggrb=3145KB/s, minb=314KB/s, maxb=411KB/s, 
mint=39836msec,

maxt=52083msec


patched:
   WRITE: io=163840KB, aggrb=10033KB/s, minb=1003KB/s, maxb=1124KB/s, 
mint=14565msec,

maxt=16329msec


Signed-off-by: Chao Yu 
---
  fs/f2fs/data.c | 13 ++---
  1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a737ca5..a0a5849 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1536,7 +1536,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct 
iov_iter *iter,
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t count = iov_iter_count(iter);
+   int rw = iov_iter_rw(iter);
int err;

/* we don't need to use inline_data strictly */
@@ -1555,12 +1557,17 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, 
struct iov_iter

*iter,


trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));

-   if (iov_iter_rw(iter) == WRITE)
+   if (rw == WRITE) {
+   mutex_lock(>writepages);


Why do we have to share sbi->writepages?


The root cause of this issue is that: in f2fs, we have no suitable
dispatcher which can do the following things as an atomic operation:
a) allocate position(s) in flash device for current block(s);
b) submit user data in allocated position(s) in block layer.

Without the dispatcher, we will suffer performance issue in following
scenario:
Thread AThread BThread C
allocate pos+1
allocate pos+2
allocate pos+3
submit pos+1
submit pos+3
submit pos+2

Our final submitting series will: pos+1, pos+3, pos+2, this makes f2fs
running into non-LFS mode, therefore resulting in bad performance.

writepages mutex lock supply us with a good solution for above issue.
It not only make the allocating and submitting pair executing atomically,
but also reduce the fragmentation for one file since we submit blocks
belong to single inode as continuous as possible.

So here I choose to use writepages mutex lock to fix the performance
issue caused by both dio write vs dio write and dio write vs buffered
write.

If I'm missing something, please correct me.




__allocate_data_blocks(inode, offset, count);


If the problem lies on the misaligned blocks, how about calling mutex_unlock
here?


When changing to unlock here, I got regression when testing with following 
command:
fio --name seqw --ioengine=sync --invalidate=1 --rw=write --directory=/mnt/f2fs 
--filesize=256m --size=4m --bs=64k --direct=1
--numjobs=20

unlock here:
   WRITE: io=81920KB, aggrb=5802KB/s, minb=290KB/s, maxb=292KB/s, 
mint=14010msec, maxt=14119msec
unlock after dio finished:
   WRITE: io=81920KB, aggrb=6088KB/s, minb=304KB/s, maxb=1081KB/s, 
mint=3786msec, maxt=13454msec

So how about keep it in original place in this patch?


Does share writepages mutex lock have an effect on cache write? Here is 
AndroBench result on my phone:

Before patch:
1R1W   8R8W   16R16W
Sequential Write   161.31 163.85  154.67
Random  Write   9.48   17.66   18.09


After patch:
1R1W   8R8W   16R16W
Sequential Write   159.61 157.24  160.11
Random  Write   9.17   8.518.8

Unit:Mb/s, File size: 64M, 

Re: [RFC PATCH] pinctrl: pinctrl-single.c: init pinctrl single at arch_initcall time

2014-10-11 Thread He YunLei

On 2014/10/9 2:10, Tony Lindgren wrote:

* He YunLei  [141007 18:43]:


Thanks for your review and I am really appreciated it, but in our arm
platform, we haven't custom initcall levels for other drivers. Although
deferred probe helps other drivers to register well, we are also confused
for the issues of lots of pin request errors debug output while booting the
kernel. Besides, if the number is bigger than the limited number, whether
deferred probe can solve this problem.


OK. Care to provide some examples where this happens on your
platform?

Note that we already have pinctrl very early in drivers/Makefile.
What are the early users for pinctrl-single in your setup?

Regards,

Tony

.

In our platform we use subsys_initcall in I2C, and fs_initcall in PMIC, 
Both of them are early than pinctrl-single. Although they register well
with the aid of deferred probe, it's really confused us that pins 
request deferred. Why can't we setup pinctrl-single earlier to reduce 
these messages.


Regards

YunLei

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH] pinctrl: pinctrl-single.c: init pinctrl single at arch_initcall time

2014-10-11 Thread He YunLei

On 2014/10/9 2:10, Tony Lindgren wrote:

* He YunLei heyun...@huawei.com [141007 18:43]:


Thanks for your review and I am really appreciated it, but in our arm
platform, we haven't custom initcall levels for other drivers. Although
deferred probe helps other drivers to register well, we are also confused
for the issues of lots of pin request errors debug output while booting the
kernel. Besides, if the number is bigger than the limited number, whether
deferred probe can solve this problem.


OK. Care to provide some examples where this happens on your
platform?

Note that we already have pinctrl very early in drivers/Makefile.
What are the early users for pinctrl-single in your setup?

Regards,

Tony

.

In our platform we use subsys_initcall in I2C, and fs_initcall in PMIC, 
Both of them are early than pinctrl-single. Although they register well
with the aid of deferred probe, it's really confused us that pins 
request deferred. Why can't we setup pinctrl-single earlier to reduce 
these messages.


Regards

YunLei

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH] pinctrl: pinctrl-single.c: init pinctrl single at arch_initcall time

2014-10-07 Thread He YunLei

On 2014/9/30 1:54, Tony Lindgren wrote:

* He YunLei  [140929 03:32]:

On our arm platform, some modules (e.g. I2C bus driver) will use the
pinctrl-single driver to configure the SoC pin, but pinctrl-single driver
uses module_init time, that makes some modules initialize ahead the
pinctrl-single and fail to register.

This patch promotes the initialization priority of pinctrl-single from
module_init time to arch_initcall time.


This has come up earlier and so far in all cases the problem
is that you have custom initcall levels for your other drivers.

Get rid of custom initcall levels for your drivers and the
problem goes away. There's no need to init the drivers earlier
nowadays. If you have other dependencies then deferred probe
helps but should be only needed for a limited number of cases.

We want to initialize things later, not earlier in general. That
removes the issues of no proper debug output while booting the
kernel.

Regards,

Tony



Signed-off-by: Yunlei He 
Signed-off-by: Xinwei Kong 
---
  drivers/pinctrl/pinctrl-single.c |   13 -
  1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-single.c
b/drivers/pinctrl/pinctrl-single.c
index 95dd9cf..4b9e5b9 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -2012,7 +2012,18 @@ static struct platform_driver pcs_driver = {
  #endif
  };

-module_platform_driver(pcs_driver);
+static int __init pinctrl_single_init(void)
+{
+   return platform_driver_register(_driver);
+}
+
+static void __exit pinctrl_single_exit(void)
+{
+   platform_driver_unregister(_driver);
+}
+
+arch_initcall(pinctrl_single_init);
+module_exit(pinctrl_single_exit);

  MODULE_AUTHOR("Tony Lindgren ");
  MODULE_DESCRIPTION("One-register-per-pin type device tree based pinctrl
driver");
--
1.7.9.5



.



Thanks for your review and I am really appreciated it, but in our arm 
platform, we haven't custom initcall levels for other drivers. Although 
deferred probe helps other drivers to register well, we are also 
confused for the issues of lots of pin request errors debug output while 
booting the kernel. Besides, if the number is bigger than the limited 
number, whether deferred probe can solve this problem.


Regards,
He YunLei

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH] pinctrl: pinctrl-single.c: init pinctrl single at arch_initcall time

2014-10-07 Thread He YunLei

On 2014/9/30 1:54, Tony Lindgren wrote:

* He YunLei heyun...@huawei.com [140929 03:32]:

On our arm platform, some modules (e.g. I2C bus driver) will use the
pinctrl-single driver to configure the SoC pin, but pinctrl-single driver
uses module_init time, that makes some modules initialize ahead the
pinctrl-single and fail to register.

This patch promotes the initialization priority of pinctrl-single from
module_init time to arch_initcall time.


This has come up earlier and so far in all cases the problem
is that you have custom initcall levels for your other drivers.

Get rid of custom initcall levels for your drivers and the
problem goes away. There's no need to init the drivers earlier
nowadays. If you have other dependencies then deferred probe
helps but should be only needed for a limited number of cases.

We want to initialize things later, not earlier in general. That
removes the issues of no proper debug output while booting the
kernel.

Regards,

Tony



Signed-off-by: Yunlei He heyun...@huawei.com
Signed-off-by: Xinwei Kong kong.kongxin...@hisilicon.com
---
  drivers/pinctrl/pinctrl-single.c |   13 -
  1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-single.c
b/drivers/pinctrl/pinctrl-single.c
index 95dd9cf..4b9e5b9 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -2012,7 +2012,18 @@ static struct platform_driver pcs_driver = {
  #endif
  };

-module_platform_driver(pcs_driver);
+static int __init pinctrl_single_init(void)
+{
+   return platform_driver_register(pcs_driver);
+}
+
+static void __exit pinctrl_single_exit(void)
+{
+   platform_driver_unregister(pcs_driver);
+}
+
+arch_initcall(pinctrl_single_init);
+module_exit(pinctrl_single_exit);

  MODULE_AUTHOR(Tony Lindgren t...@atomide.com);
  MODULE_DESCRIPTION(One-register-per-pin type device tree based pinctrl
driver);
--
1.7.9.5



.



Thanks for your review and I am really appreciated it, but in our arm 
platform, we haven't custom initcall levels for other drivers. Although 
deferred probe helps other drivers to register well, we are also 
confused for the issues of lots of pin request errors debug output while 
booting the kernel. Besides, if the number is bigger than the limited 
number, whether deferred probe can solve this problem.


Regards,
He YunLei

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH] pinctrl: pinctrl-single.c: init pinctrl single at arch_initcall time

2014-09-29 Thread He YunLei
On our arm platform, some modules (e.g. I2C bus driver) will use the 
pinctrl-single driver to configure the SoC pin, but pinctrl-single 
driver uses module_init time, that makes some modules initialize ahead 
the pinctrl-single and fail to register.


This patch promotes the initialization priority of pinctrl-single from 
module_init time to arch_initcall time.



Signed-off-by: Yunlei He 
Signed-off-by: Xinwei Kong 
---
 drivers/pinctrl/pinctrl-single.c |   13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-single.c 
b/drivers/pinctrl/pinctrl-single.c

index 95dd9cf..4b9e5b9 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -2012,7 +2012,18 @@ static struct platform_driver pcs_driver = {
 #endif
 };

-module_platform_driver(pcs_driver);
+static int __init pinctrl_single_init(void)
+{
+   return platform_driver_register(_driver);
+}
+
+static void __exit pinctrl_single_exit(void)
+{
+   platform_driver_unregister(_driver);
+}
+
+arch_initcall(pinctrl_single_init);
+module_exit(pinctrl_single_exit);

 MODULE_AUTHOR("Tony Lindgren ");
 MODULE_DESCRIPTION("One-register-per-pin type device tree based 
pinctrl driver");

--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH] pinctrl: pinctrl-single.c: init pinctrl single at arch_initcall time

2014-09-29 Thread He YunLei
On our arm platform, some modules (e.g. I2C bus driver) will use the 
pinctrl-single driver to configure the SoC pin, but pinctrl-single 
driver uses module_init time, that makes some modules initialize ahead 
the pinctrl-single and fail to register.


This patch promotes the initialization priority of pinctrl-single from 
module_init time to arch_initcall time.



Signed-off-by: Yunlei He heyun...@huawei.com
Signed-off-by: Xinwei Kong kong.kongxin...@hisilicon.com
---
 drivers/pinctrl/pinctrl-single.c |   13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-single.c 
b/drivers/pinctrl/pinctrl-single.c

index 95dd9cf..4b9e5b9 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -2012,7 +2012,18 @@ static struct platform_driver pcs_driver = {
 #endif
 };

-module_platform_driver(pcs_driver);
+static int __init pinctrl_single_init(void)
+{
+   return platform_driver_register(pcs_driver);
+}
+
+static void __exit pinctrl_single_exit(void)
+{
+   platform_driver_unregister(pcs_driver);
+}
+
+arch_initcall(pinctrl_single_init);
+module_exit(pinctrl_single_exit);

 MODULE_AUTHOR(Tony Lindgren t...@atomide.com);
 MODULE_DESCRIPTION(One-register-per-pin type device tree based 
pinctrl driver);

--
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/