[PATCH] Btrfs-progs: introduce '-p' option and into subvolume set-default command

2012-09-23 Thread Chen Yang
In command "btrfs subvolume set-default", we used subvolume  and 
to set the default subvolume of a filesystem. It's not easy for a common
user, so I improved it and the  of a subvolume can be used to
set the default subvolume of a filesystem.

Signed-off-by: Cheng Yang 
---
 cmds-subvolume.c |   89 ++
 man/btrfs.8.in   |6 ++--
 2 files changed, 79 insertions(+), 16 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 8399e72..827234c 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -26,6 +26,7 @@
 #include 
 
 #include "kerncompat.h"
+#include "ctree.h"
 #include "ioctl.h"
 #include "qgroup.h"
 
@@ -601,23 +602,66 @@ static int cmd_subvol_get_default(int argc, char **argv)
 }
 
 static const char * const cmd_subvol_set_default_usage[] = {
-   "btrfs subvolume set-default  ",
+   "btrfs subvolume set-default [-p] [] ",
"Set the default subvolume of a filesystem",
+   "-pSet the parent tree(subvolume) of the PATH",
+   "  as the default subvolume, if PATH is not a subvolume",
NULL
 };
 
 static int cmd_subvol_set_default(int argc, char **argv)
 {
-   int ret=0, fd, e;
-   u64 objectid;
+   int ret = 0, fd = -1, e;
+   int parent = 0;
+   u64 objectid = -1;
char*path;
-   char*subvolid;
+   char*subvolid, *inv;
 
-   if (check_argc_exact(argc, 3))
+   optind = 1;
+   while (1) {
+   int c = getopt(argc, argv, "p");
+   if (c < 0)
+   break;
+
+   switch (c) {
+   case 'p':
+   parent = 1;
+   break;
+   default:
+   usage(cmd_subvol_set_default_usage);
+   }
+   }
+
+   if (check_argc_min(argc - optind, 1) ||
+   check_argc_max(argc - optind, 2))
usage(cmd_subvol_set_default_usage);
 
-   subvolid = argv[1];
-   path = argv[2];
+   if (argc - optind == 2) {
+   subvolid = argv[optind];
+   path = argv[optind + 1];
+
+   objectid = (unsigned long long)strtoll(subvolid, &inv, 0);
+   if (errno == ERANGE || subvolid == inv) {
+   fprintf(stderr,
+   "ERROR: invalid tree id (%s)\n", subvolid);
+   return 30;
+   }
+   } else {
+   path = argv[optind];
+
+   ret = test_issubvolume(path);
+   if (ret < 0) {
+   fprintf(stderr,
+   "ERROR: error accessing '%s'\n", path);
+   return 12;
+   }
+   if (!ret && !parent) {
+   fprintf(stderr,
+   "ERROR: '%s' is not a subvolume\n",
+   path);
+   return 13;
+   }
+   }
 
fd = open_file_or_dir(path);
if (fd < 0) {
@@ -625,16 +669,35 @@ static int cmd_subvol_set_default(int argc, char **argv)
return 12;
}
 
-   objectid = (unsigned long long)strtoll(subvolid, NULL, 0);
-   if (errno == ERANGE) {
-   fprintf(stderr, "ERROR: invalid tree id (%s)\n",subvolid);
-   return 30;
+   /*
+ When objectid is -1, it means that
+ subvolume id is not specified by user.
+ We will set default subvolume by .
+   */
+   if (objectid == -1) {
+   struct btrfs_ioctl_ino_lookup_args args;
+
+   memset(&args, 0, sizeof(args));
+   args.treeid = 0;
+   args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+   ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+   if (ret) {
+   fprintf(stderr,
+   "ERROR: can't perform the search - %s\n",
+   strerror(errno));
+   return ret;
+   }
+
+   objectid = args.treeid;
}
+
ret = ioctl(fd, BTRFS_IOC_DEFAULT_SUBVOL, &objectid);
e = errno;
close(fd);
-   if( ret < 0 ){
-   fprintf(stderr, "ERROR: unable to set a new default subvolume - 
%s\n",
+   if (ret < 0) {
+   fprintf(stderr,
+   "ERROR: unable to set a new default subvolume - %s\n",
strerror(e));
return 30;
}
diff --git a/man/btrfs.8.in b/man/btrfs.8.in
index 3f7765d..2bc1d97 100644
--- a/man/btrfs.8.in
+++ b/man/btrfs.8.in
@@ -13,7 +13,7 @@ btrfs \- control a btrfs filesystem
 .PP
 \fBbtrfs\fP \fBsubvolume list\fP\fI [-pr] [-s 0|1] [-g [+|-]value] [-c 
[+|-]value] [--rootid=rootid,gen,ogen,path] \fP
 .PP
-\fBbtrfs\fP \fBsubvolume set-default\fP\fI  \fP
+\fBbtrfs\fP \fBsubvolume set-default\fP\fI [-p] [] \fP
 .PP
 \fBbtrfs\fP \fBsub

Btrfs: check range early in map_private_extent_buffer

2012-09-23 Thread Wang Sheng-Hui
Check range early to avoid further check/compute in case
of range error.

Signed-off-by: Wang Sheng-Hui 
---
 fs/btrfs/extent_io.c |   16 
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4c87847..9250cf5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4643,6 +4643,14 @@ int map_private_extent_buffer(struct extent_buffer *eb, 
unsigned long start,
unsigned long end_i = (start_offset + start + min_len - 1) >>
PAGE_CACHE_SHIFT;
 
+   if (start + min_len > eb->len) {
+   printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
+  "wanted %lu %lu\n", (unsigned long long)eb->start,
+  eb->len, start, min_len);
+   WARN_ON(1);
+   return -EINVAL;
+   }
+
if (i != end_i)
return -EINVAL;
 
@@ -4654,14 +4662,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, 
unsigned long start,
*map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
}
 
-   if (start + min_len > eb->len) {
-   printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
-  "wanted %lu %lu\n", (unsigned long long)eb->start,
-  eb->len, start, min_len);
-   WARN_ON(1);
-   return -EINVAL;
-   }
-
p = extent_buffer_page(eb, i);
kaddr = page_address(p);
*map = kaddr + offset;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH V2] Btrfs-progs, btrfsck: add block group check function

2012-09-23 Thread Chen Yang
Any comments ?

于 2012-8-8 11:06, Chen Yang 写道:
> From: Chen Yang 
> 
> This patch adds the function to check correspondence
> between block group, chunk and device extent.
> 
> Signed-off-by: Cheng Yang 
> ---
> v1->v2: optimaze the checking process:
>   * Remove the checking traversal of block group RB tree.
>   * Mark block group item which matched with chunk item.
>   * Output the unmarked block group item error infomaton.
> when releasing RB tree.
>   * Merge some relevant flows into one.
> ---
>  Makefile   |2 +-
>  btrfsck.c  |  517 
> +++-
>  dev-extent-cache.c |  188 +++
>  dev-extent-cache.h |   60 ++
>  4 files changed, 760 insertions(+), 7 deletions(-)
>  create mode 100644 dev-extent-cache.c
>  create mode 100644 dev-extent-cache.h
> 
> diff --git a/Makefile b/Makefile
> index 969..75eced8 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -4,7 +4,7 @@ CFLAGS = -g -O0
>  objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
> root-tree.o dir-item.o file-item.o inode-item.o \
> inode-map.o crc32c.o rbtree.o extent-cache.o extent_io.o \
> -   volumes.o utils.o btrfs-list.o btrfslabel.o repair.o
> +   volumes.o utils.o btrfs-list.o btrfslabel.o repair.o 
> dev-extent-cache.o
>  cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o 
> \
>  cmds-inspect.o cmds-balance.o
>  
> diff --git a/btrfsck.c b/btrfsck.c
> index 088b9f4..437aee9 100644
> --- a/btrfsck.c
> +++ b/btrfsck.c
> @@ -34,6 +34,66 @@
>  #include "list.h"
>  #include "version.h"
>  #include "utils.h"
> +#include "dev-extent-cache.h"
> +
> +#define REC_UNCHECKED0
> +#define REC_CHECKED  1
> +
> +struct block_group_record {
> + struct cache_extent cache;
> + int state;
> +
> + u64 objectid;
> + u8  type;
> + u64 offset;
> +
> + u64 flags;
> +};
> +
> +struct dev_record {
> + struct cache_extent cache;
> + int state;
> +
> + u64 objectid;
> + u8  type;
> + u64 offset;
> +
> + u64 devid;
> + u64 total_byte;
> + u64 byte_used;
> +};
> +
> +struct stripe {
> + u64 devid;
> + u64 offset;
> +};
> +
> +struct chunk_record {
> + struct cache_extent cache;
> + int state;
> +
> + u64 objectid;
> + u8  type;
> + u64 offset;
> +
> + u64 length;
> + u64 type_flags;
> + u16 num_stripes;
> + struct stripe stripes[0];
> +};
> +
> +struct dev_extent_record {
> + struct cache_dev_extent cache;
> + int state;
> +
> + u64 objectid;
> + u8  type;
> + u64 offset;
> +
> + u64 chunk_objecteid;
> + u64 chunk_offset;
> + u64 length;
> +};
>  
>  static u64 bytes_used = 0;
>  static u64 total_csum_bytes = 0;
> @@ -1852,7 +1912,7 @@ static int all_backpointers_checked(struct 
> extent_record *rec, int print_errs)
>   (unsigned long long)rec->start,
>   back->full_backref ?
>   "parent" : "root",
> - back->full_backref ? 
> + back->full_backref ?
>   (unsigned long long)dback->parent:
>   (unsigned long long)dback->root,
>   (unsigned long long)dback->owner,
> @@ -2440,6 +2500,153 @@ static int process_extent_ref_v0(struct cache_tree 
> *extent_cache,
>  }
>  #endif
>  
> +static int process_chunk_item(struct cache_tree *chunk_cache,
> + struct btrfs_key *key, struct extent_buffer *eb, int slot)
> +{
> + struct btrfs_chunk *ptr;
> + struct chunk_record *rec;
> + int num_stripes, i;
> + int ret = 0;
> +
> + ptr = btrfs_item_ptr(eb,
> + slot, struct btrfs_chunk);
> +
> + num_stripes = btrfs_chunk_num_stripes(eb, ptr);
> +
> + rec = malloc(sizeof(*rec) +
> + num_stripes * sizeof(*rec->stripes));
> + if (!rec) {
> + fprintf(stderr, "memory allocation failed\n");
> + return -ENOMEM;
> + }
> +
> + rec->cache.start = key->offset;
> + rec->cache.size = 1;
> + rec->state = REC_UNCHECKED;
> +
> + rec->objectid = key->objectid;
> + rec->type = key->type;
> + rec->offset = key->offset;
> +
> + rec->length = btrfs_chunk_length(eb, ptr);
> + rec->type = btrfs_chunk_type(eb, ptr);
> + rec->num_stripes = num_stripes;
> +
> + for (i = 0; i < rec->num_stripes; ++i) {
> + rec->stripes[i].devid =
> + btrfs_stripe_devid_nr(eb, ptr, i);
> + rec->stripes[i].offset =
> + btrfs_stripe_offset_nr(eb, ptr, i);
> + }
> +
> + ret = insert_existing_cache_extent(
> + chunk_cache, &rec->cache);
> +
> + return ret;
> +}
> +
> +static int process_dev_item(struct cache_tree *dev_

Re: [PATCH] Btrfs-progs: btrfs subvolume delete could delete subvolumes

2012-09-23 Thread Anand Jain


David,

 Thanks for the comments.

> Yes, this is useful, thanks. I'm thinking if it's ok to stop on
> first error, ie. when the subvolume does not exist or is a directory.

 I am fine with either ways. I shall just keep it as it is as of now.

-Anand
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs-progs: btrfs subvolume delete could delete subvolumes

2012-09-23 Thread Anand jain
From: Anand Jain 

Signed-off-by: Anand Jain 
---
 cmds-subvolume.c |   36 
 1 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index f4aa80f..f6c488e 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -188,31 +188,34 @@ int test_issubvolume(char *path)
 }
 
 static const char * const cmd_subvol_delete_usage[] = {
-   "btrfs subvolume delete ",
-   "Delete a subvolume",
+   "btrfs subvolume delete  [...]",
+   "Delete subvolume(s)",
NULL
 };
 
 static int cmd_subvol_delete(int argc, char **argv)
 {
-   int res, fd, len, e;
+   int res, fd, len, e, cnt = 1, ret = 0;
struct btrfs_ioctl_vol_args args;
char*dname, *vname, *cpath;
char*path;
 
-   if (check_argc_exact(argc, 2))
+   if (check_argc_min(argc, 2))
usage(cmd_subvol_delete_usage);
 
-   path = argv[1];
+again:
+   path = argv[cnt];
 
res = test_issubvolume(path);
if(res<0){
fprintf(stderr, "ERROR: error accessing '%s'\n", path);
-   return 12;
+   ret = 12;
+   goto out;
}
if(!res){
fprintf(stderr, "ERROR: '%s' is not a subvolume\n", path);
-   return 13;
+   ret = 13;
+   goto out;
}
 
cpath = realpath(path, 0);
@@ -226,21 +229,24 @@ static int cmd_subvol_delete(int argc, char **argv)
 strchr(vname, '/') ){
fprintf(stderr, "ERROR: incorrect subvolume name ('%s')\n",
vname);
-   return 14;
+   ret = 14;
+   goto out;
}
 
len = strlen(vname);
if (len == 0 || len >= BTRFS_VOL_NAME_MAX) {
fprintf(stderr, "ERROR: snapshot name too long ('%s)\n",
vname);
-   return 14;
+   ret = 14;
+   goto out;
}
 
fd = open_file_or_dir(dname);
if (fd < 0) {
close(fd);
fprintf(stderr, "ERROR: can't access to '%s'\n", dname);
-   return 12;
+   ret = 12;
+   goto out;
}
 
printf("Delete subvolume '%s/%s'\n", dname, vname);
@@ -254,10 +260,16 @@ static int cmd_subvol_delete(int argc, char **argv)
if(res < 0 ){
fprintf( stderr, "ERROR: cannot delete '%s/%s' - %s\n",
dname, vname, strerror(e));
-   return 11;
+   ret = 11;
+   goto out;
}
 
-   return 0;
+out:
+   cnt++;
+   if (cnt < argc)
+   goto again;
+
+   return ret;
 }
 
 static const char * const cmd_subvol_list_usage[] = {
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V3 1/2] Btrfs: cleanup duplicated division functions

2012-09-23 Thread Miao Xie
On Sun, 23 Sep 2012 14:49:24 +0300, Ilya Dryomov wrote:
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 9384a2a..d8d53f7 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -3335,6 +3335,24 @@ static long btrfs_ioctl_balance(struct file *file, 
>> void __user *arg)
>>  
>>  goto do_balance;
>>  }
>> +
>> +if ((bargs->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
>> +(bargs->data.usage < 0 || bargs->data.usage > 100)) {
>> +ret = -EINVAL;
>> +goto out_bargs;
>> +}
>> +
>> +if ((bargs->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
>> +(bargs->meta.usage < 0 || bargs->meta.usage > 100)) {
>> +ret = -EINVAL;
>> +goto out_bargs;
>> +}
>> +
>> +if ((bargs->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
>> +(bargs->sys.usage < 0 || bargs->sys.usage > 100)) {
>> +ret = -EINVAL;
>> +goto out_bargs;
>> +}
>>  } else {
>>  bargs = NULL;
>>  }
> 
> Why not drop this hunk ...

Generally, we should check the value when it is input. If not, we might
run our program with the wrong value, and it is possible to cause unknown
problems. So I think the above chunk is necessary.

> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 6019fb2..ff86f91 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -2334,8 +2334,13 @@ static int chunk_usage_filter(struct btrfs_fs_info 
> *fs_info, u64 chunk_offset,
>   cache = btrfs_lookup_block_group(fs_info, chunk_offset);
>   chunk_used = btrfs_block_group_used(&cache->item);
>  
> - BUG_ON(bargs->usage < 0 || bargs->usage > 100);
> - user_thresh = div_factor(cache->key.offset, bargs->usage);
> + if (bargs->usage == 0)
> + user_thresh = 0;
> + else if (bargs->usage >= 100)
> + user_thresh = cache->key.offset;
> + else
> + user_thresh = div_factor(cache->key.offset, bargs->usage);
> +
>   if (chunk_used < user_thresh)
>   ret = 0;
> 
> (diff is on top of the patch in question)
> 
> This is the most straightforward transformation I can think of.  It
> doesn't result in an unnecessary BUG_ON, keeps churn to a minimum and

agree with you.

> doesn't change the "style" of the balance ioctl.  (If I were to check
> every filter argument that way, btrfs_balance_ioctl() would be very long
> and complicated.)


I think the check in btrfs_balance_ioctl() is necessary, the reason is above.

Thanks
Miao
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] Btrfs: tivial cleanup: add space between = and the rest code

2012-09-23 Thread Wang Sheng-Hui
trivial code cleanup.

Signed-off-by: Wang Sheng-Hui 
---
 fs/btrfs/transaction.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 675d813..3279df2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1619,7 +1619,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
BTRFS_MIXED_BACKREF_REV)
ret = btrfs_drop_snapshot(root, NULL, 0, 0);
else
-   ret =btrfs_drop_snapshot(root, NULL, 1, 0);
+   ret = btrfs_drop_snapshot(root, NULL, 1, 0);
BUG_ON(ret < 0);
}
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] Btrfs: remove unnecessary cur_trans set before goto loop in join_transaction

2012-09-23 Thread Wang Sheng-Hui
In the big loop, cur_trans will be set fs_info->running_transaction
before it's used. And after kmem_cache_free it and goto loop, it will
be setup again. No need to setup it immediately after freed.

Signed-off-by: Wang Sheng-Hui 
---
 fs/btrfs/transaction.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 469a8b6..675d813 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -98,7 +98,6 @@ loop:
 * to redo the trans_no_join checks above
 */
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
-   cur_trans = fs_info->running_transaction;
goto loop;
} else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
spin_unlock(&fs_info->trans_lock);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] Btrfs: trivial cleanup: setup ret with 0 directly in btrfs_wait_for_commit

2012-09-23 Thread Wang Sheng-Hui
trivial code cleanup.

Signed-off-by: Wang Sheng-Hui 
---
 fs/btrfs/transaction.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8fcc501..469a8b6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -420,9 +420,8 @@ static noinline void wait_for_commit(struct btrfs_root 
*root,
 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 {
struct btrfs_transaction *cur_trans = NULL, *t;
-   int ret;
+   int ret = 0;
 
-   ret = 0;
if (transid) {
if (transid <= root->fs_info->last_trans_committed)
goto out;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] Btrfs: cleanup: return the ret value of __btrfs_end_transaction directly in btrfs_end_transaction_

2012-09-23 Thread Wang Sheng-Hui
No need to use specific var to record the return value of
__btrfs_end_transaction and check if it is not zero.
Just return the result directly as btrfs_end_transaction_dmeta.

Signed-off-by: Wang Sheng-Hui 
---
 fs/btrfs/transaction.c |   21 +++--
 1 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 27c2600..8fcc501 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -602,34 +602,19 @@ static int __btrfs_end_transaction(struct 
btrfs_trans_handle *trans,
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
  struct btrfs_root *root)
 {
-   int ret;
-
-   ret = __btrfs_end_transaction(trans, root, 0, 1);
-   if (ret)
-   return ret;
-   return 0;
+   return __btrfs_end_transaction(trans, root, 0, 1);
 }
 
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
   struct btrfs_root *root)
 {
-   int ret;
-
-   ret = __btrfs_end_transaction(trans, root, 1, 1);
-   if (ret)
-   return ret;
-   return 0;
+   return __btrfs_end_transaction(trans, root, 1, 1);
 }
 
 int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
 struct btrfs_root *root)
 {
-   int ret;
-
-   ret = __btrfs_end_transaction(trans, root, 0, 0);
-   if (ret)
-   return ret;
-   return 0;
+   return __btrfs_end_transaction(trans, root, 0, 0);
 }
 
 int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: crash in read_extent_buffer+0xb7/0xfb

2012-09-23 Thread Marc MERLIN
On Thu, Sep 20, 2012 at 08:46:52PM -0700, Marc MERLIN wrote:
> On Thu, Sep 20, 2012 at 10:17:47AM -0700, Marc MERLIN wrote:
> > I had a btrfs built on top of 5 drives (dmcrypt devices).
> > 
> > The drive then died while I was writing to the filesystem and my system
> > crashed and rebooted:
> > 
> > [384555.534020] sd 10:0:0:0: rejecting I/O to offline device
> > 
> > [384555.535057] sd 10:0:0:0: rejecting I/O to offline device
> > 
> > [384556.666885] [ cut here ]
> > 
> > [384556.667909] sd 10:0:0:0: [sdj] Synchronizing SCSI cache 
> > 
> > [384556.677509] kernel BUG at fs/btrfs/ctree.c:3451!
> > 
> > [384556.682551] invalid opcode:  [#1] PREEMPT SMP   
> > 
> > [384556.687878] CPU 2   
> > 
> > 
>  
> Oh my, now I'm trying again with a new drive, and a big cp from an
> existing array to a new one dies with:
> [32042.079411] [ cut here ]   
>   
> [32042.085799] kernel BUG at fs/btrfs/extent_io.c:1884!   
>   
> [32042.092528] invalid opcode:  [#1] PREEMPT SMP  
>   
> [32042.099227] CPU 1  
>   
> [32042.101095] Modules linked in:[32042.105950]  raid456 async_raid6_recov 
> async
> _pq raid6_pq async_xor xor async_memcpy async_tx ppdev lp tun autofs4 
> kl5kusb105
>  ftdi_sio keyspan nfsd nfs lockd fscache auth_rpcgss nfs_acl sunrpc 
> rc_ati_x10 s
> nd_timer i915 usbserial snd drm_kms_helper eeepc_wmi drm ati_remote asus_wmi 
> rc_
> core sparse_keymap

I had a different crash while copying to a btrfs 5 disk array. Not sure if this 
is
also fixed too, but pasting just in case.
 
[207025.055956] btrfs: bdev /dev/mapper/crypt_sdo1 errs: wr 46779, rd 0, flush 
7 6, corrupt 0, gen 0
[207055.067267] btrfs bad mapping eb start 8653217792 len 4096, wanted 
184467440 50581869634 4
[207055.078099] general protection fault:  [#1] PREEMPT SMP
[207055.085213] CPU 3
[207055.087173] Modules linked in:[207055.091512]  raid456 async_raid6_recov asy
nc_pq raid6_pq async_xor xor async_memcpy async_tx ppdev lp tun autofs4 kl5kusb1
05 ftdi_sio keyspan nfsd nfs lockd fscache auth_rpcgss nfs_acl sunrpc ipt_REJECT
 xt_state xt_tcpudp xt_LOG iptable_mangle iptable_filter deflate ctr twofish_gen
eric twofish_x86_64_3way twofish_x86_64 twofish_common camellia_generic camellia
_x86_64 serpent_sse2_x86_64 lrw serpent_generic xts gf128mul blowfish_generic bl
owfish_x86_64 blowfish_common cast5 des_generic xcbc rmd160 sha512_generic crypt
o_null af_key xfrm_algo dm_crypt dm_mirror dm_region_hash dm_log aes_x86_64 fuse
 lm85 hwmon_vid dm_snapshot dm_mod iptable_nat ip_tables nf_conntrack_ftp ipt_MA
SQUERADE nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 x_tables nf_conntrack sg st snd
_pcm_oss snd_mixer_oss snd_hda_codec_hdmi snd_hda_codec_realtek snd_cmipci gamep
ort rc_ati_x10 snd_opl3_lib snd_mpu401_uart pl2303 ati_remote rc_core snd_seq_mi
di snd_seq_midi_event snd_seq usbserial snd_rawmidi kvm_intel kvm snd_seq_device
 snd_hda_intel[207055.193933]  i915 snd_hda_codec drm_kms_helper snd_hwdep snd_p
cm drm snd_timer eeepc_wmi asus_wmi sparse_keymap rfkill snd i2c_i801 parport_pc
 acpi_cpufreq i2c_algo_bit microcode crc32c_intel ehci_hcd xhci_hcd ghash_clmuln
i_intel pci_hotplug wmi cryptd r8169 snd_page_alloc soundcore pcspkr tpm_tis mpe
rf tpm evdev tpm_bios usbcore i2c_core parport mii lpc_ich mei sata_sil24 corete
mp sata_mv fan thermal processor button video thermal_sys usb_common [last unloa
ded: kl5kusb105]

[207055.244330] Pid: 6456, comm: btrfs-transacti Tainted: GW
3.5.3-amd64-preempt-noide-20120903 #1 System manufacturer System Product 
Name/P8H67-M PRO
[207055.261478] RIP: 0010:[]  [] 
read_extent_buffer+0xb7/0xfb
[207055.271621] RSP: 0018:880105ff3880  EFLAGS: 00010202
[207055.278516] RAX: 0bbe RBX: 8800405ba1f8 RCX: 
8800405ba2c8
[207055.287257] RDX: 880105ff38ec RSI: 0086 RDI: 
880105ff38ec
[207055.295967] RBP: 880105ff38c0 R08: 007ffd4ebdc8 R09: 
1600
[207055.304674] R10: 1000 R11: 6db6db6db6db6db7 R12: 
0004
[207055.313356] R13: 8800 R14: fffa9d7b9446 R15: 
044 2
[207055.322032] FS:  () GS:88011f38() 
knlGS:
[207055.331692] CS:  0010 DS:  ES:  CR0: 8005003b
[207055.339014] CR2: f7021000 CR3: 01a0c000 CR4: 
000407e0
[207055.347715] DR0:  DR1:  DR2: 

[207055.356403] DR3:  DR6: 0ff0 DR7: 
0400
[207055.365092] Process btrfs-transacti (pid: 6456, threadinfo 
880105ff2000,task 880105e7e600)
[207055.376219] Stack:
[207055.380369]  fffa9d7b

[RFC v2 10/10] vfs: add documentation

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

Signed-off-by: Zhi Yong Wu 
---
 Documentation/filesystems/hot_tracking.txt |  106 
 1 files changed, 106 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/filesystems/hot_tracking.txt

diff --git a/Documentation/filesystems/hot_tracking.txt 
b/Documentation/filesystems/hot_tracking.txt
new file mode 100644
index 000..340df45
--- /dev/null
+++ b/Documentation/filesystems/hot_tracking.txt
@@ -0,0 +1,106 @@
+Hot Data Tracking
+
+Introduction
+---
+
+  The feature adds experimental support for tracking data temperature
+information in VFS layer.  Essentially, this means maintaining some key
+stats(like number of reads/writes, last read/write time, frequency of
+reads/writes), then distilling those numbers down to a single
+"temperature" value that reflects what data is "hot," and using that
+temperature to move data to SSDs.
+
+  The long-term goal of the feature is to allow some FSs,
+e.g. Btrfs to intelligently utilize SSDs in a heterogenous volume.
+Incidentally, this project has been motivated by
+the Project Ideas page on the Btrfs wiki.
+
+  Of course, users are warned not to run this code outside of development
+environments. These patches are EXPERIMENTAL, and as such they might eat
+your data and/or memory. That said, the code should be relatively safe
+when the hottrack mount option are disabled.
+
+Motivation
+---
+
+  The overall goal of enabling hot data relocation to SSD has been
+motivated by the Project Ideas page on the Btrfs wiki at
+.
+It will divide into two steps. VFS provide hot data tracking function
+while specific FS will provide hot data relocation function.
+So as the first step of this goal, it is hoped that the patchset
+for hot data tracking will eventually mature into VFS.
+
+  This is essentially the traditional cache argument: SSD is fast and
+expensive; HDD is cheap but slow. ZFS, for example, can already take
+advantage of SSD caching. Btrfs should also be able to take advantage of
+hybrid storage without many broad, sweeping changes to existing code.
+
+Main Parts Description
+---
+
+These include the following parts:
+* Hooks in existing vfs functions to track data access frequency
+* New rbtrees for tracking access frequency of inodes and sub-file
+ranges (hot_rb.c)
+The relationship between super_block and rbtree is as below:
+super_block->s_hotinfo.hot_inode_tree
+In include/linux/fs.h, one struct hot_info s_hotinfo is added to
+super_block struct. Each FS instance can find hot tracking info
+s_hotinfo via its super_block. In this hot_info, it store a lot of hot
+tracking info such as hot_inode_tree, inode and range hash list, etc.
+* A hash list for indexing data by its temperature (hot_hash.c)
+* A debugfs interface for dumping data from the rbtrees (hot_debugfs.c)
+* A background kthread for updating inode heat info
+* Mount options for enabling temperature tracking(-o hottrack,
+default mean disabled) (hot_track.c)
+* An ioctl to retrieve the frequency information collected for a certain
+file
+* Ioctls to enable/disable frequency tracking per inode.
+
+Git Development Tree
+---
+
+  The feature is still on development and review, so if you're interested,
+you can pull from the git repository at the following location:
+  https://github.com/wuzhy/kernel.git hot_tracking
+  git://github.com/wuzhy/kernel.git hot_tracking
+
+Usage Example
+---
+To use hot tracking, you should mount like this:
+
+$ mount -o hottrack /dev/sdb /mnt
+[ 1505.894078] device label test devid 1 transid 29 /dev/sdb
+[ 1505.952977] btrfs: disk space caching is enabled
+[ 1506.069678] vfs: turning on hot data tracking
+
+Mount debugfs at first:
+
+$ mount -t debugfs none /sys/kernel/debug
+$ ls -l /sys/kernel/debug/vfs_hotdata/
+total 0
+drwxr-xr-x 2 root root 0 Aug  8 04:40 sdb
+$ ls -l /sys/kernel/debug/vfs_hotdata/sdb
+total 0
+-rw-r--r-- 1 root root 0 Aug  8 04:40 inode_data
+-rw-r--r-- 1 root root 0 Aug  8 04:40 range_data
+
+View information about hot tracking from debugfs:
+
+$ echo "hot tracking test" > /mnt/file
+$ cat /sys/kernel/debug/hot_track/sdb/inode_data
+inode #279, reads 0, writes 1, avg read time 18446744073709551615,
+avg write time 5251566408153596, temp 109
+$ cat /sys/kernel/debug/hot_track/sdb/range_data
+inode #279, range start 0 (range len 1048576) reads 0, writes 1,
+avg read time 18446744073709551615, avg write time 1128690176623144209, temp 64
+
+$ echo "hot data tracking test" >> /mnt/file
+$ cat /sys/kernel/debug/hot_track/sdb/inode_da

[RFC v2 04/10] vfs: add init and exit support

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  Add initialization function to create some
key data structures when hot tracking is enabled;
Clean up them when hot tracking is disabled

Signed-off-by: Zhi Yong Wu 
---
 fs/hot_tracking.c |   60 +
 fs/hot_tracking.h |2 +
 fs/namespace.c|4 +++
 fs/super.c|6 +
 4 files changed, 72 insertions(+), 0 deletions(-)

diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index f97e8a6..fa89f70 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -135,6 +135,51 @@ static void hot_rb_free_hot_range_item(struct 
hot_range_item *hr)
}
 }
 
+static int hot_rb_remove_hot_inode_item(struct hot_inode_tree *tree,
+struct hot_inode_item *he)
+{
+int ret = 0;
+rb_erase(&he->rb_node, &tree->map);
+he->in_tree = 0;
+return ret;
+}
+
+static int hot_rb_remove_hot_range_item(struct hot_range_tree *tree,
+struct hot_range_item *hr)
+{
+int ret = 0;
+rb_erase(&hr->rb_node, &tree->map);
+hr->in_tree = 0;
+return ret;
+}
+
+/* Frees the entire hot_inode_tree. */
+static void hot_rb_inode_tree_free(struct hot_info *root)
+{
+   struct rb_node *node, *node2;
+   struct hot_inode_item *he;
+   struct hot_range_item *hr;
+
+   /* Free hot inode and range trees on fs root */
+   node = rb_first(&root->hot_inode_tree.map);
+
+   while (node) {
+   he = rb_entry(node, struct hot_inode_item, rb_node);
+
+   node2 = rb_first(&he->hot_range_tree.map);
+   while (node2) {
+   hr = rb_entry(node2, struct hot_range_item, rb_node);
+   hot_rb_remove_hot_range_item(&he->hot_range_tree, hr);
+   hot_rb_free_hot_range_item(hr);
+   node2 = rb_first(&he->hot_range_tree.map);
+   }
+
+   hot_rb_remove_hot_inode_item(&root->hot_inode_tree, he);
+   hot_rb_free_hot_inode_item(he);
+   node = rb_first(&root->hot_inode_tree.map);
+   }
+}
+
 static struct rb_node *hot_rb_insert_hot_inode_item(struct rb_root *root,
unsigned long inode_num,
struct rb_node *node)
@@ -507,3 +552,18 @@ void __init hot_track_cache_init(void)
if (hot_rb_item_cache_init())
return;
 }
+
+/*
+ * Initialize the data structures for hot data tracking.
+ */
+void hot_track_init(struct super_block *sb, const char *name)
+{
+   sb->s_hotinfo.mount_opt |= HOT_MOUNT_HOT_TRACK;
+   hot_rb_inode_tree_init(&sb->s_hotinfo.hot_inode_tree);
+}
+
+void hot_track_exit(struct super_block *sb)
+{
+   sb->s_hotinfo.mount_opt &= ~HOT_MOUNT_HOT_TRACK;
+   hot_rb_inode_tree_free(&sb->s_hotinfo);
+}
diff --git a/fs/hot_tracking.h b/fs/hot_tracking.h
index 6bd09eb..3a8d398 100644
--- a/fs/hot_tracking.h
+++ b/fs/hot_tracking.h
@@ -39,5 +39,7 @@ void hot_rb_update_freqs(struct inode *inode, u64 start, u64 
len,
 
 bool hot_track_parse_options(char *options);
 void __init hot_track_cache_init(void);
+void hot_track_init(struct super_block *sb, const char *name);
+void hot_track_exit(struct super_block *sb);
 
 #endif /* __HOT_TRACKING__ */
diff --git a/fs/namespace.c b/fs/namespace.c
index 4d31f73..55006c8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
 #include/* get_fs_root et.al. */
 #include /* fsnotify_vfsmount_delete */
 #include 
+#include "hot_tracking.h"
 #include "pnode.h"
 #include "internal.h"
 
@@ -1215,6 +1216,9 @@ static int do_umount(struct mount *mnt, int flags)
return retval;
}
 
+   if (sb->s_hotinfo.mount_opt & HOT_MOUNT_HOT_TRACK)
+   hot_track_exit(sb);
+
down_write(&namespace_sem);
br_write_lock(&vfsmount_lock);
event++;
diff --git a/fs/super.c b/fs/super.c
index 7eb3b0c..0999d5c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1153,6 +1153,9 @@ mount_fs(struct file_system_type *type, int flags, const 
char *name, void *data)
WARN_ON(sb->s_bdi == &default_backing_dev_info);
sb->s_flags |= MS_BORN;
 
+   if (hottrack)
+   hot_track_init(sb, name);
+
error = security_sb_kern_mount(sb, flags, secdata);
if (error)
goto out_sb;
@@ -1170,6 +1173,9 @@ mount_fs(struct file_system_type *type, int flags, const 
char *name, void *data)
free_secdata(secdata);
return root;
 out_sb:
+   if (hottrack)
+   hot_track_exit(sb);
+
dput(root);
deactivate_locked_super(sb);
 out_free_secdata:
-- 
1.7.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v2 03/10] vfs: add one new mount option '-o hottrack'

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  Introduce one new mount option '-o hottrack',
and add its parsing support.
  Its usage looks like:
   mount -o hottrack
   mount -o nouser,hottrack
   mount -o nouser,hottrack,loop
   mount -o hottrack,nouser

Signed-off-by: Zhi Yong Wu 
---
 fs/hot_tracking.c|   34 ++
 fs/hot_tracking.h|1 +
 fs/super.c   |5 +
 include/linux/hot_tracking.h |7 +++
 4 files changed, 47 insertions(+), 0 deletions(-)

diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index 52ed926..f97e8a6 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -465,6 +465,40 @@ void hot_rb_update_freqs(struct inode *inode, u64 start,
 }
 
 /*
+ * Regular mount options parser for -hottrack option.
+ * return false if no -hottrack is specified;
+ * otherwise return true. And the -hottrack will be
+ * removed from options.
+ */
+bool hot_track_parse_options(char *options)
+{
+   long len;
+   char *p;
+   static char opts_hot[] = "hottrack";
+
+   if (!options)
+   return false;
+
+   p = strstr(options, opts_hot);
+   if (!p)
+   return false;
+
+   while (p) {
+   len = options + strlen(options) - (p + strlen(opts_hot));
+   if (len == 0) {
+   options[0] = '\0';
+   break;
+   }
+
+   memmove(p, p + strlen(opts_hot) + 1, len);
+   p = strstr(options, opts_hot);
+   }
+
+   printk(KERN_INFO "vfs: turning on hot data tracking\n");
+   return true;
+}
+
+/*
  * Initialize kmem cache for hot_inode_item
  * and hot_range_item
  */
diff --git a/fs/hot_tracking.h b/fs/hot_tracking.h
index 2ba29e4..6bd09eb 100644
--- a/fs/hot_tracking.h
+++ b/fs/hot_tracking.h
@@ -37,6 +37,7 @@ void hot_rb_free_hot_inode_item(struct hot_inode_item *he);
 void hot_rb_update_freqs(struct inode *inode, u64 start, u64 len,
int rw);
 
+bool hot_track_parse_options(char *options);
 void __init hot_track_cache_init(void);
 
 #endif /* __HOT_TRACKING__ */
diff --git a/fs/super.c b/fs/super.c
index 0902cfa..7eb3b0c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include "hot_tracking.h"
 #include "internal.h"
 
 
@@ -1125,6 +1126,7 @@ mount_fs(struct file_system_type *type, int flags, const 
char *name, void *data)
struct dentry *root;
struct super_block *sb;
char *secdata = NULL;
+   bool hottrack = false;
int error = -ENOMEM;
 
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
@@ -1137,6 +1139,9 @@ mount_fs(struct file_system_type *type, int flags, const 
char *name, void *data)
goto out_free_secdata;
}
 
+   if (data && hot_track_parse_options(data))
+   hottrack = true;
+
root = type->mount(type, flags, name, data);
if (IS_ERR(root)) {
error = PTR_ERR(root);
diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h
index a566f91..bb2a41c 100644
--- a/include/linux/hot_tracking.h
+++ b/include/linux/hot_tracking.h
@@ -20,6 +20,11 @@
 #include 
 #include 
 
+/*
+ * Flags for hot data tracking mount options.
+ */
+#define HOT_MOUNT_HOT_TRACK(1 << 0)
+
 /* A tree that sits on the hot_info */
 struct hot_inode_tree {
struct rb_root map;
@@ -89,6 +94,8 @@ struct hot_range_item {
 };
 
 struct hot_info {
+   unsigned long mount_opt;
+
/* red-black tree that keeps track of fs-wide hot data */
struct hot_inode_tree hot_inode_tree;
 };
-- 
1.7.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v2 02/10] vfs: add support for updating access frequency

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  Add some utils helpers to update access frequencies
for one file or its range.

Signed-off-by: Zhi Yong Wu 
---
 fs/hot_tracking.c |  359 +
 fs/hot_tracking.h |   15 +++
 2 files changed, 374 insertions(+), 0 deletions(-)

diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index 173054b..52ed926 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -106,6 +106,365 @@ inode_err:
 }
 
 /*
+ * Drops the reference out on hot_inode_item by one and free the structure
+ * if the reference count hits zero
+ */
+void hot_rb_free_hot_inode_item(struct hot_inode_item *he)
+{
+   if (!he)
+   return;
+
+   if (atomic_dec_and_test(&he->refs.refcount)) {
+   WARN_ON(he->in_tree);
+   kmem_cache_free(hot_inode_item_cache, he);
+   }
+}
+
+/*
+ * Drops the reference out on hot_range_item by one and free the structure
+ * if the reference count hits zero
+ */
+static void hot_rb_free_hot_range_item(struct hot_range_item *hr)
+{
+   if (!hr)
+   return;
+
+   if (atomic_dec_and_test(&hr->refs.refcount)) {
+   WARN_ON(hr->in_tree);
+   kmem_cache_free(hot_range_item_cache, hr);
+   }
+}
+
+static struct rb_node *hot_rb_insert_hot_inode_item(struct rb_root *root,
+   unsigned long inode_num,
+   struct rb_node *node)
+{
+   struct rb_node **p = &root->rb_node;
+   struct rb_node *parent = NULL;
+   struct hot_inode_item *entry;
+
+   /* walk tree to find insertion point */
+   while (*p) {
+   parent = *p;
+   entry = rb_entry(parent, struct hot_inode_item, rb_node);
+
+   if (inode_num < entry->i_ino)
+   p = &(*p)->rb_left;
+   else if (inode_num > entry->i_ino)
+   p = &(*p)->rb_right;
+   else
+   return parent;
+   }
+
+   entry = rb_entry(node, struct hot_inode_item, rb_node);
+   entry->in_tree = 1;
+   rb_link_node(node, parent, p);
+   rb_insert_color(node, root);
+
+   return NULL;
+}
+
+static u64 hot_rb_range_end(struct hot_range_item *hr)
+{
+   if (hr->start + hr->len < hr->start)
+   return (u64)-1;
+
+   return hr->start + hr->len - 1;
+}
+
+static struct rb_node *hot_rb_insert_hot_range_item(struct rb_root *root,
+   u64 start,
+   struct rb_node *node)
+{
+   struct rb_node **p = &root->rb_node;
+   struct rb_node *parent = NULL;
+   struct hot_range_item *entry;
+
+   /* ensure start is on a range boundary */
+   start = start & RANGE_SIZE_MASK;
+   /* walk tree to find insertion point */
+   while (*p) {
+   parent = *p;
+   entry = rb_entry(parent, struct hot_range_item, rb_node);
+
+   if (start < entry->start)
+   p = &(*p)->rb_left;
+   else if (start >= hot_rb_range_end(entry))
+   p = &(*p)->rb_right;
+   else
+   return parent;
+   }
+
+   entry = rb_entry(node, struct hot_range_item, rb_node);
+   entry->in_tree = 1;
+   rb_link_node(node, parent, p);
+   rb_insert_color(node, root);
+
+   return NULL;
+}
+
+/*
+ * Add a hot_inode_item to a hot_inode_tree. If the tree already contains
+ * an item with the index given, return -EEXIST
+ */
+static int hot_rb_add_hot_inode_item(struct hot_inode_tree *tree,
+   struct hot_inode_item *he)
+{
+   int ret = 0;
+   struct rb_node *rb;
+
+   rb = hot_rb_insert_hot_inode_item(
+   &tree->map, he->i_ino, &he->rb_node);
+   if (rb) {
+   ret = -EEXIST;
+   goto out;
+   }
+
+   kref_get(&he->refs);
+
+out:
+   return ret;
+}
+
+/*
+ * Add a hot_range_item to a hot_range_tree. If the tree already contains
+ * an item with the index given, return -EEXIST
+ *
+ * Also optionally aggresively merge ranges (currently disabled)
+ */
+static int hot_rb_add_hot_range_item(struct hot_range_tree *tree,
+   struct hot_range_item *hr)
+{
+   int ret = 0;
+   struct rb_node *rb;
+
+   rb = hot_rb_insert_hot_range_item(
+   &tree->map, hr->start, &hr->rb_node);
+   if (rb) {
+   ret = -EEXIST;
+   goto out;
+   }
+
+   kref_get(&hr->refs);
+
+out:
+   return ret;
+}
+
+/*
+ * Lookup a hot_inode_item in the hot_inode_tree with the given index
+ * (inode_num)
+ */
+struct hot_inode_item
+*hot_rb_lookup_hot_inode_item(struct hot_inode_tree *tree,
+   unsigned long inode_num)
+{
+   struct rb_node **p = &(tree->map.rb_node);
+   struct rb_node *parent = N

[RFC v2 05/10] vfs: introduce one hash table

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  Adds a hash table structure which contains
a lot of hash list and is used to efficiently
look up the data temperature of a file or its
ranges.
  In each hash list of hash table, the hash node
will keep track of temperature info.

Signed-off-by: Zhi Yong Wu 
---
 fs/hot_tracking.c|   77 -
 include/linux/hot_tracking.h |   35 +++
 2 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index fa89f70..5f96442 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -24,6 +25,9 @@
 /* kmem_cache pointers for slab caches */
 static struct kmem_cache *hot_inode_item_cache;
 static struct kmem_cache *hot_range_item_cache;
+static struct kmem_cache *hot_hash_node_cache;
+
+static void hot_hash_node_init(void *_node);
 
 /*
  * Initialize the inode tree. Should be called for each new inode
@@ -57,6 +61,10 @@ void hot_rb_inode_item_init(void *_item)
memset(he, 0, sizeof(*he));
kref_init(&he->refs);
spin_lock_init(&he->lock);
+   he->heat_node = kmem_cache_alloc(hot_hash_node_cache,
+   GFP_KERNEL | GFP_NOFS);
+   hot_hash_node_init(he->heat_node);
+   he->heat_node->hot_freq_data = &he->hot_freq_data;
he->hot_freq_data.avg_delta_reads = (u64) -1;
he->hot_freq_data.avg_delta_writes = (u64) -1;
he->hot_freq_data.flags = FREQ_DATA_TYPE_INODE;
@@ -75,6 +83,10 @@ static void hot_rb_range_item_init(void *_item)
memset(hr, 0, sizeof(*hr));
kref_init(&hr->refs);
spin_lock_init(&hr->lock);
+   hr->heat_node = kmem_cache_alloc(hot_hash_node_cache,
+   GFP_KERNEL | GFP_NOFS);
+   hot_hash_node_init(hr->heat_node);
+   hr->heat_node->hot_freq_data = &hr->hot_freq_data;
hr->hot_freq_data.avg_delta_reads = (u64) -1;
hr->hot_freq_data.avg_delta_writes = (u64) -1;
hr->hot_freq_data.flags = FREQ_DATA_TYPE_RANGE;
@@ -105,6 +117,18 @@ inode_err:
return -ENOMEM;
 }
 
+static void hot_rb_inode_item_exit(void)
+{
+   if (hot_inode_item_cache)
+   kmem_cache_destroy(hot_inode_item_cache);
+}
+
+static void hot_rb_range_item_exit(void)
+{
+   if (hot_range_item_cache)
+   kmem_cache_destroy(hot_range_item_cache);
+}
+
 /*
  * Drops the reference out on hot_inode_item by one and free the structure
  * if the reference count hits zero
@@ -510,6 +534,48 @@ void hot_rb_update_freqs(struct inode *inode, u64 start,
 }
 
 /*
+ * Initialize hash node.
+ */
+static void hot_hash_node_init(void *_node)
+{
+   struct hot_hash_node *node = _node;
+
+   memset(node, 0, sizeof(*node));
+   INIT_HLIST_NODE(&node->hashnode);
+   node->hot_freq_data = NULL;
+   node->hlist = NULL;
+   spin_lock_init(&node->lock);
+   kref_init(&node->refs);
+}
+
+static int __init hot_hash_node_cache_init(void)
+{
+   hot_hash_node_cache = kmem_cache_create("hot_hash_node",
+   sizeof(struct hot_hash_node),
+   0,
+   SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+   hot_hash_node_init);
+   if (!hot_hash_node_cache)
+   return -ENOMEM;
+
+   return 0;
+}
+
+/*
+ * Initialize inode/range hash lists.
+ */
+static void hot_hash_table_init(struct hot_info *root)
+{
+   int i;
+   for (i = 0; i < HEAT_HASH_SIZE; i++) {
+   root->heat_inode_hl[i].temperature = i;
+   root->heat_range_hl[i].temperature = i;
+   rwlock_init(&root->heat_inode_hl[i].rwlock);
+   rwlock_init(&root->heat_range_hl[i].rwlock);
+   }
+}
+
+/*
  * Regular mount options parser for -hottrack option.
  * return false if no -hottrack is specified;
  * otherwise return true. And the -hottrack will be
@@ -544,13 +610,18 @@ bool hot_track_parse_options(char *options)
 }
 
 /*
- * Initialize kmem cache for hot_inode_item
- * and hot_range_item
+ * Initialize kmem cache for hot_inode_item,
+ * hot_range_item and hot_hash_node
  */
 void __init hot_track_cache_init(void)
 {
if (hot_rb_item_cache_init())
return;
+
+   if (hot_hash_node_cache_init()) {
+   hot_rb_inode_item_exit();
+   hot_rb_range_item_exit();
+   }
 }
 
 /*
@@ -560,10 +631,12 @@ void hot_track_init(struct super_block *sb, const char 
*name)
 {
sb->s_hotinfo.mount_opt |= HOT_MOUNT_HOT_TRACK;
hot_rb_inode_tree_init(&sb->s_hotinfo.hot_inode_tree);
+   hot_hash_table_init(&sb->s_hotinfo);
 }
 
 void hot_track_exit(struct super_block *sb)
 {
sb->s_hotinfo.mount_opt &= ~HOT_MOUNT_HOT_TRACK;
+   hot_hash_table_free(&sb->s_hotinfo);
hot_rb_inode_tree_free(&sb->s_hotinfo);
 }
diff --git a/include/lin

[RFC v2 06/10] vfs: enable hot data tracking

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  Miscellaneous features that implement hot data tracking
and generally make the hot data functions a bit more friendly.

Signed-off-by: Zhi Yong Wu 
---
 fs/direct-io.c   |   10 ++
 include/linux/hot_tracking.h |   11 +++
 mm/filemap.c |8 
 mm/page-writeback.c  |   21 +
 mm/readahead.c   |9 +
 5 files changed, 59 insertions(+), 0 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index f86c720..3773f44 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include "hot_tracking.h"
 
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
@@ -1297,6 +1298,15 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct 
inode *inode,
prefetch(bdev->bd_queue);
prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
 
+   /* Hot data tracking */
+   if (TRACK_THIS_INODE(iocb->ki_filp->f_mapping->host)
+   && iov_length(iov, nr_segs) > 0) {
+   hot_rb_update_freqs(iocb->ki_filp->f_mapping->host,
+   (u64)offset,
+   (u64)iov_length(iov, nr_segs),
+   rw & WRITE);
+   }
+
return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
 nr_segs, get_block, end_io,
 submit_io, flags);
diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h
index 635ffb6..bc41f94 100644
--- a/include/linux/hot_tracking.h
+++ b/include/linux/hot_tracking.h
@@ -28,6 +28,14 @@
  */
 #define HOT_MOUNT_HOT_TRACK(1 << 0)
 
+/* Hot data tracking -- guard macros */
+#define TRACKING_HOT_TRACK(root) \
+   (root->s_hotinfo.mount_opt & HOT_MOUNT_HOT_TRACK)
+
+#define TRACK_THIS_INODE(inode) \
+   ((TRACKING_HOT_TRACK(inode->i_sb)) && \
+   !(inode->i_flags & S_NOHOTDATATRACK))
+
 /* A tree that sits on the hot_info */
 struct hot_inode_tree {
struct rb_root map;
@@ -135,4 +143,7 @@ struct hot_info {
struct hot_hash_head heat_range_hl[HEAT_HASH_SIZE];
 };
 
+extern void hot_rb_update_freqs(struct inode *inode,
+   u64 start, u64 len, int rw);
+
 #endif  /* _LINUX_HOTTRACK_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 3843445..8b1ecff 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
 #include  /* for BUG_ON(!in_atomic()) only */
 #include 
 #include 
+#include 
 #include "internal.h"
 
 /*
@@ -1224,6 +1225,13 @@ readpage:
 * PG_error will be set again if readpage fails.
 */
ClearPageError(page);
+
+   /* Hot data tracking */
+   if (TRACK_THIS_INODE(filp->f_mapping->host))
+   hot_rb_update_freqs(filp->f_mapping->host,
+   (u64)page->index << PAGE_CACHE_SHIFT,
+   PAGE_CACHE_SIZE, 0);
+
/* Start the actual read. The read will unlock the page. */
error = mapping->a_ops->readpage(filp, page);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5ad5ce2..552c861 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -35,6 +35,7 @@
 #include  /* __set_page_dirty_buffers */
 #include 
 #include 
+#include 
 #include 
 
 /*
@@ -1895,13 +1896,33 @@ EXPORT_SYMBOL(generic_writepages);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
int ret;
+   pgoff_t start = 0;
+   u64 prev_count = 0, count = 0;
 
if (wbc->nr_to_write <= 0)
return 0;
+
+   /* Hot data tracking */
+   if (TRACK_THIS_INODE(mapping->host)
+   && wbc->range_cyclic) {
+   start = mapping->writeback_index << PAGE_CACHE_SHIFT;
+   prev_count = (u64)wbc->nr_to_write;
+   }
+
if (mapping->a_ops->writepages)
ret = mapping->a_ops->writepages(mapping, wbc);
else
ret = generic_writepages(mapping, wbc);
+
+   /* Hot data tracking */
+   if (TRACK_THIS_INODE(mapping->host)
+   && wbc->range_cyclic) {
+   count = prev_count - (u64)wbc->nr_to_write;
+   if (count)
+   hot_rb_update_freqs(mapping->host, (u64)start,
+   count * PAGE_CACHE_SIZE, 1);
+   }
+
return ret;
 }
 
diff --git a/mm/readahead.c b/mm/readahead.c
index ea8f8fa..7010fc4 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Initialise a struct file's readahead state.  Assumes that the caller has
@@ -138,6 +139,14 @@ static int read_pages(struct address_space *mapping, 
struct file *filp,
 out:
blk_finish_plug(&plug);
 
+   /* Hot data 

[RFC v2 09/10] vfs: add debugfs support

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  Add a /sys/kernel/debug/hot_track// directory for each
volume that contains two files. The first, `inode_data', contains the
heat information for inodes that have been brought into the hot data map
structures. The second, `range_data', contains similar information for
subfile ranges.

Signed-off-by: Zhi Yong Wu 
---
 fs/hot_tracking.c |  466 +
 fs/hot_tracking.h |   40 +
 fs/namespace.c|6 +
 3 files changed, 512 insertions(+), 0 deletions(-)

diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index fd11695..6aeabad 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -22,6 +22,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include "hot_tracking.h"
 
 /* kmem_cache pointers for slab caches */
@@ -29,6 +32,13 @@ static struct kmem_cache *hot_inode_item_cache;
 static struct kmem_cache *hot_range_item_cache;
 static struct kmem_cache *hot_hash_node_cache;
 
+/* list to keep track of each mounted volumes debugfs_vol_data */
+static struct list_head hot_debugfs_vol_data_list;
+/* lock for debugfs_vol_data_list */
+static spinlock_t hot_debugfs_data_list_lock;
+/* pointer to top level debugfs dentry */
+static struct dentry *hot_debugfs_root_dentry;
+
 static struct task_struct *hot_track_temperature_update_kthread;
 
 static void hot_hash_node_init(void *_node);
@@ -1004,6 +1014,460 @@ static int hot_hash_temperature_update_kthread(void 
*arg)
return 0;
 }
 
+static int hot_debugfs_copy(struct debugfs_vol_data *data, char *msg, int len)
+{
+   struct lstring *debugfs_log = data->debugfs_log;
+   uint new_log_alloc_size;
+   char *new_log;
+   static char err_msg[] = "No more memory!\n";
+
+   if (len >= data->log_alloc_size - debugfs_log->len) {
+   /* Not enough room in the log buffer for the new message. */
+   /* Allocate a bigger buffer. */
+   new_log_alloc_size = data->log_alloc_size + LOG_PAGE_SIZE;
+   new_log = vmalloc(new_log_alloc_size);
+
+   if (new_log) {
+   memcpy(new_log, debugfs_log->str, debugfs_log->len);
+   memset(new_log + debugfs_log->len, 0,
+   new_log_alloc_size - debugfs_log->len);
+   vfree(debugfs_log->str);
+   debugfs_log->str = new_log;
+   data->log_alloc_size = new_log_alloc_size;
+   } else {
+   WARN_ON(1);
+   if (data->log_alloc_size - debugfs_log->len) {
+   strlcpy(debugfs_log->str +
+   debugfs_log->len,
+   err_msg,
+   data->log_alloc_size - debugfs_log->len);
+   debugfs_log->len +=
+   min((typeof(debugfs_log->len))
+   sizeof(err_msg),
+   ((typeof(debugfs_log->len))
+   data->log_alloc_size - debugfs_log->len));
+   }
+   return 0;
+   }
+   }
+
+   memcpy(debugfs_log->str + debugfs_log->len, data->log_work_buff, len);
+   debugfs_log->len += (unsigned long) len;
+
+   return len;
+}
+
+/* Returns the number of bytes written to the log. */
+static int hot_debugfs_log(struct debugfs_vol_data *data, const char *fmt, ...)
+{
+   struct lstring *debugfs_log = data->debugfs_log;
+   va_list args;
+   int len;
+   static char trunc_msg[] =
+   "The next message has been truncated.\n";
+
+   if (debugfs_log->str == NULL)
+   return -1;
+
+   spin_lock(&data->log_lock);
+
+   va_start(args, fmt);
+   len = vsnprintf(data->log_work_buff,
+   sizeof(data->log_work_buff), fmt, args);
+   va_end(args);
+
+   if (len >= sizeof(data->log_work_buff)) {
+   hot_debugfs_copy(data, trunc_msg, sizeof(trunc_msg));
+   }
+
+   len = hot_debugfs_copy(data, data->log_work_buff, len);
+   spin_unlock(&data->log_lock);
+
+   return len;
+}
+
+/* initialize a log corresponding to a fs volume */
+static int hot_debugfs_log_init(struct debugfs_vol_data *data)
+{
+   int err = 0;
+   struct lstring *debugfs_log = data->debugfs_log;
+
+   spin_lock(&data->log_lock);
+   debugfs_log->str = vmalloc(INIT_LOG_ALLOC_SIZE);
+   if (debugfs_log->str) {
+   memset(debugfs_log->str, 0, INIT_LOG_ALLOC_SIZE);
+   data->log_alloc_size = INIT_LOG_ALLOC_SIZE;
+   } else {
+   err = -ENOMEM;
+   }
+   spin_unlock(&data->log_lock);
+
+   return err;
+}
+
+/* free a log corresponding to a fs volume */
+static void hot_debugfs_log_exit(struct debugfs_vol_data *data)
+{
+   struct lstring *debugfs_log = data->debugfs_log;
+
+   

[RFC v2 07/10] vfs: fork one kthread to update data temperature

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  Fork and run one kernel kthread to calculate
that temperature based on some metrics kept
in custom frequency data structs, and store
the info in the hash table.

Signed-off-by: Zhi Yong Wu 
---
 fs/hot_tracking.c|  467 +-
 fs/hot_tracking.h|   78 +++
 include/linux/hot_tracking.h |3 +
 3 files changed, 542 insertions(+), 6 deletions(-)

diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index 5f96442..fd11695 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -17,6 +17,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -27,7 +29,12 @@ static struct kmem_cache *hot_inode_item_cache;
 static struct kmem_cache *hot_range_item_cache;
 static struct kmem_cache *hot_hash_node_cache;
 
+static struct task_struct *hot_track_temperature_update_kthread;
+
 static void hot_hash_node_init(void *_node);
+static int hot_hash_is_aging(struct hot_freq_data *freq_data);
+static void hot_hash_update_hash_table(struct hot_freq_data *freq_data,
+struct hot_info *root);
 
 /*
  * Initialize the inode tree. Should be called for each new inode
@@ -456,9 +463,13 @@ static struct hot_inode_item 
*hot_rb_update_inode_freq(struct inode *inode,
write_unlock(&hitree->lock);
}
 
-   spin_lock(&he->lock);
-   hot_rb_update_freq(&he->hot_freq_data, rw);
-   spin_unlock(&he->lock);
+   if (!hot_track_temperature_update_kthread
+   || hot_track_temperature_update_kthread->pid != current->pid) {
+   spin_lock(&he->lock);
+   hot_rb_update_freq(&he->hot_freq_data, rw);
+   spin_unlock(&he->lock);
+   hot_hash_update_hash_table(&he->hot_freq_data, root);
+   }
 
 out:
return he;
@@ -505,9 +516,14 @@ static bool hot_rb_update_range_freq(struct hot_inode_item 
*he,
write_unlock(&hrtree->lock);
}
 
-   spin_lock(&hr->lock);
-   hot_rb_update_freq(&hr->hot_freq_data, rw);
-   spin_unlock(&hr->lock);
+   if (!hot_track_temperature_update_kthread
+   || hot_track_temperature_update_kthread->pid != 
current->pid) {
+   spin_lock(&hr->lock);
+   hot_rb_update_freq(&hr->hot_freq_data, rw);
+   spin_unlock(&hr->lock);
+   hot_hash_update_hash_table(&hr->hot_freq_data, root);
+   }
+
hot_rb_free_hot_range_item(hr);
}
 
@@ -515,6 +531,58 @@ out:
return ret;
 }
 
+/* Walk the hot_inode_tree, locking as necessary */
+static struct hot_inode_item
+*hot_rb_find_next_hot_inode(struct hot_info *root,
+   u64 objectid)
+{
+   struct rb_node *node;
+   struct rb_node *prev;
+   struct hot_inode_item *entry;
+
+   read_lock(&root->hot_inode_tree.lock);
+
+   node = root->hot_inode_tree.map.rb_node;
+   prev = NULL;
+   while (node) {
+   prev = node;
+   entry = rb_entry(node, struct hot_inode_item, rb_node);
+
+   if (objectid < entry->i_ino)
+   node = node->rb_left;
+   else if (objectid > entry->i_ino)
+   node = node->rb_right;
+   else
+   break;
+   }
+
+   if (!node) {
+   while (prev) {
+   entry = rb_entry(prev, struct hot_inode_item, rb_node);
+   if (objectid <= entry->i_ino) {
+   node = prev;
+   break;
+   }
+   prev = rb_next(prev);
+   }
+   }
+
+   if (node) {
+   entry = rb_entry(node, struct hot_inode_item, rb_node);
+   /*
+ * increase reference count to prevent pruning while
+ * caller is using the hot_inode_item
+ */
+   kref_get(&entry->refs);
+
+   read_unlock(&root->hot_inode_tree.lock);
+   return entry;
+   }
+
+   read_unlock(&root->hot_inode_tree.lock);
+   return NULL;
+}
+
 /* main function to update access frequency from read/writepage(s) hooks */
 void hot_rb_update_freqs(struct inode *inode, u64 start,
u64 len, int rw)
@@ -534,6 +602,65 @@ void hot_rb_update_freqs(struct inode *inode, u64 start,
 }
 
 /*
+ * take hot range that is now cold and remove from indexes and clean up
+ * any memory associted, involves removing hot range from rb tree, and
+ * heat hash lists, and freeing up all memory.
+ */
+static void hot_rb_remove_range_data(struct hot_inode_item *hot_inode,
+   struct hot_range_item *hr,
+   struct hot_info *root)
+{
+   /* remove range from rb tree */
+   hot_rb_remove_hot_range_item(&h

[RFC v2 08/10] vfs: add 3 new ioctl interfaces

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  FS_IOC_GET_HEAT_INFO: return a struct containing the various
metrics collected in btrfs_freq_data structs, and also return a
calculated data temperature based on those metrics. Optionally, retrieve
the temperature from the hot data hash list instead of recalculating it.

  FS_IOC_GET_HEAT_OPTS: return an integer representing the current
state of hot data tracking and migration:

0 = do nothing
1 = track frequency of access

  FS_IOC_SET_HEAT_OPTS: change the state of hot data tracking and
migration, as described above.

Signed-off-by: Zhi Yong Wu 
---
 fs/compat_ioctl.c|8 +++
 fs/ioctl.c   |  130 ++
 include/linux/fs.h   |   11 
 include/linux/hot_tracking.h |   12 
 4 files changed, 161 insertions(+), 0 deletions(-)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index debdfe0..a88c7de 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1390,6 +1390,11 @@ COMPATIBLE_IOCTL(TIOCSTART)
 COMPATIBLE_IOCTL(TIOCSTOP)
 #endif
 
+/*Hot data tracking*/
+COMPATIBLE_IOCTL(FS_IOC_GET_HEAT_INFO)
+COMPATIBLE_IOCTL(FS_IOC_SET_HEAT_OPTS)
+COMPATIBLE_IOCTL(FS_IOC_GET_HEAT_OPTS)
+
 /* fat 'r' ioctls. These are handled by fat with ->compat_ioctl,
but we don't want warnings on other file systems. So declare
them as compatible here. */
@@ -1572,6 +1577,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, 
unsigned int cmd,
case FIBMAP:
case FIGETBSZ:
case FIONREAD:
+   case FS_IOC_GET_HEAT_INFO:
+   case FS_IOC_SET_HEAT_OPTS:
+   case FS_IOC_GET_HEAT_OPTS:
if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
break;
/*FALL THROUGH*/
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 29167be..394975e 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include "hot_tracking.h"
 
 #include 
 
@@ -537,6 +538,126 @@ static int ioctl_fsthaw(struct file *filp)
 }
 
 /*
+ * Retrieve information about access frequency for the given file. Return it in
+ * a userspace-friendly struct for btrfsctl (or another tool) to parse.
+ *
+ * The temperature that is returned can be "live" -- that is, recalculated when
+ * the ioctl is called -- or it can be returned from the hashtable, reflecting
+ * the (possibly old) value that the system will use when considering files
+ * for migration. This behavior is determined by hot_heat_info->live.
+ */
+static int ioctl_heat_info(struct file *file, void __user *argp)
+{
+   struct inode *mnt_inode = file->f_path.dentry->d_inode;
+   struct inode *file_inode;
+   struct file *file_filp;
+   struct hot_info *root = &(mnt_inode->i_sb->s_hotinfo);
+   struct hot_heat_info *heat_info;
+   struct hot_inode_tree *hitree;
+   struct hot_inode_item *he;
+   int ret;
+
+   heat_info = kmalloc(sizeof(struct hot_heat_info),
+   GFP_KERNEL | GFP_NOFS);
+
+   if (copy_from_user((void *) heat_info,
+   argp,
+   sizeof(struct hot_heat_info)) != 0) {
+   ret = -EFAULT;
+   goto err;
+   }
+
+   file_filp = filp_open(heat_info->filename, O_RDONLY, 0);
+   file_inode = file_filp->f_dentry->d_inode;
+   filp_close(file_filp, NULL);
+
+   hitree = &root->hot_inode_tree;
+   read_lock(&hitree->lock);
+   he = hot_rb_lookup_hot_inode_item(hitree, file_inode->i_ino);
+   read_unlock(&hitree->lock);
+   if (!he) {
+   /* we don't have any info on this file yet */
+   ret = -ENODATA;
+   goto err;
+   }
+
+   spin_lock(&he->lock);
+   heat_info->avg_delta_reads =
+   (__u64) he->hot_freq_data.avg_delta_reads;
+   heat_info->avg_delta_writes =
+   (__u64) he->hot_freq_data.avg_delta_writes;
+   heat_info->last_read_time =
+   (__u64) 
timespec_to_ns(&he->hot_freq_data.last_read_time);
+   heat_info->last_write_time =
+   (__u64) 
timespec_to_ns(&he->hot_freq_data.last_write_time);
+   heat_info->num_reads =
+   (__u32) he->hot_freq_data.nr_reads;
+   heat_info->num_writes =
+   (__u32) he->hot_freq_data.nr_writes;
+
+   if (heat_info->live > 0) {
+   /* got a request for live temperature,
+* call hot_hash_calc_temperature to recalculate
+*/
+   heat_info->temperature =
+   hot_hash_calc_temperature(&he->hot_freq_data);
+   } else {
+   /* not live temperature, get it from the hashlist */
+   read_lock(&he->heat_node->hlist->rwlock);
+   heat_info->temperature = he->heat_node->hlist->temperature;
+   read_unlock(&he->heat_node->hlist->rwlock);
+   }
+   spin_unlock(&he->lock);
+
+   hot_rb_f

[RFC v2 01/10] vfs: introduce private rb structures

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

  One root structure hot_info is defined, is hooked
up in super_block, and will be used to hold rb trees
root, hash list root and some other information, etc.
  Adds hot_inode_tree struct to keep track of
frequently accessed files, and be keyed by {inode, offset}.
Trees contain hot_inode_items representing those files
and ranges.
  Having these trees means that vfs can quickly determine the
temperature of some data by doing some calculations on the
hot_freq_data struct that hangs off of the tree item.
  Define two items hot_inode_item and hot_range_item,
one of them represents one tracked file
to keep track of its access frequency and the tree of
ranges in this file, while the latter represents
a file range of one inode.
  Each of the two structures contains a hot_freq_data
struct with its frequency of access metrics (number of
{reads, writes}, last {read,write} time, frequency of
{reads,writes}).
  Also, each hot_inode_item contains one hot_range_tree
struct which is keyed by {inode, offset, length}
and used to keep track of all the ranges in this file.

Signed-off-by: Zhi Yong Wu 
---
 fs/Makefile  |2 +-
 fs/dcache.c  |2 +
 fs/hot_tracking.c|  116 ++
 fs/hot_tracking.h|   27 ++
 include/linux/fs.h   |4 ++
 include/linux/hot_tracking.h |   96 ++
 6 files changed, 246 insertions(+), 1 deletions(-)
 create mode 100644 fs/hot_tracking.c
 create mode 100644 fs/hot_tracking.h
 create mode 100644 include/linux/hot_tracking.h

diff --git a/fs/Makefile b/fs/Makefile
index 2fb9779..9d29618 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=  open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
-   stack.o fs_struct.o statfs.o
+   stack.o fs_struct.o statfs.o hot_tracking.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=   buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/dcache.c b/fs/dcache.c
index 8086636..92470a1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include "hot_tracking.h"
 #include "internal.h"
 #include "mount.h"
 
@@ -3164,6 +3165,7 @@ void __init vfs_caches_init(unsigned long mempages)
inode_init();
files_init(mempages);
mnt_init();
+   hot_track_cache_init();
bdev_cache_init();
chrdev_init();
 }
diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
new file mode 100644
index 000..173054b
--- /dev/null
+++ b/fs/hot_tracking.c
@@ -0,0 +1,116 @@
+/*
+ * fs/hot_tracking.c
+ *
+ * Copyright (C) 2012 IBM Corp. All rights reserved.
+ * Written by Zhi Yong Wu 
+ *Ben Chociej 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "hot_tracking.h"
+
+/* kmem_cache pointers for slab caches */
+static struct kmem_cache *hot_inode_item_cache;
+static struct kmem_cache *hot_range_item_cache;
+
+/*
+ * Initialize the inode tree. Should be called for each new inode
+ * access or other user of the hot_inode interface.
+ */
+static void hot_rb_inode_tree_init(struct hot_inode_tree *tree)
+{
+   tree->map = RB_ROOT;
+   rwlock_init(&tree->lock);
+}
+
+/*
+ * Initialize the hot range tree. Should be called for each new inode
+ * access or other user of the hot_range interface.
+ */
+void hot_rb_range_tree_init(struct hot_range_tree *tree)
+{
+   tree->map = RB_ROOT;
+   rwlock_init(&tree->lock);
+}
+
+/*
+ * Initialize a new hot_inode_item structure. The new structure is
+ * returned with a reference count of one and needs to be
+ * freed using free_inode_item()
+ */
+void hot_rb_inode_item_init(void *_item)
+{
+   struct hot_inode_item *he = _item;
+
+   memset(he, 0, sizeof(*he));
+   kref_init(&he->refs);
+   spin_lock_init(&he->lock);
+   he->hot_freq_data.avg_delta_reads = (u64) -1;
+   he->hot_freq_data.avg_delta_writes = (u64) -1;
+   he->hot_freq_data.flags = FREQ_DATA_TYPE_INODE;
+   hot_rb_range_tree_init(&he->hot_range_tree);
+}
+
+/*
+ * Initialize a new hot_range_item structure. The new structure is
+ * returned with a reference count of one and needs to be
+ * freed using free_range_item()
+ */
+static void hot_rb_range_item_init(void *_item)
+{
+   struct hot_range_item *hr = _item;
+
+   memset(hr, 0, sizeof(*hr));
+   kref_init(&hr->refs);
+   spin_lock_init(&hr->lock);
+   hr->hot_freq_data.avg_delta_reads = (u64) -1;
+   hr->hot_freq_data.avg_delta_writes = (u64)

[RFC v2 00/10] vfs: hot data tracking

2012-09-23 Thread zwu . kernel
From: Zhi Yong Wu 

NOTE:

  The patchset is currently post out mainly to make sure
it is going in the correct direction and hope to get some
helpful comments from other guys.
  For more infomation, please check hot_tracking.txt in Documentation

TODO List:

 1.) Need to do scalability or performance tests.
 2.) Turn some Micro into be tunable
   TIME_TO_KICK, and HEAT_UPDATE_DELAY
 3.) Rafactor hot_hash_is_aging()
   If you just made the timeout value a timespec and compared
 the _timespecs_, you would be doing a lot fewer conversions.
 4.) Cleanup some unnecessary lock protect
 5.) Add more comments to explain how to calc temperature
   How to "read" the avg read/write time (nanoseconds,
 microseconds, jiffies??)
 6.) Make updating tempreture more parallel
 7.) How to save the file tempreture among the umount to be able to
 preserve the file tempreture after reboot
 8.) Add one new ioctl inteface to set temperature value.

Ben Chociej, Matt Lupfer and Conor Scott originally wrote this code to
 be very btrfs-specific.  I've taken their code and attempted to
make it more generic and integrate it at the VFS level.

Changelog from v1:
 1.) Reduce new files and put all in fs/hot_tracking.[ch] [Dave Chinner]
 2.) The first three patches can probably just be flattened into one.
[Marco Stornelli , Dave Chinner]

Zhi Yong Wu (10):
  vfs: introduce private rb structures
  vfs: add support for updating access frequency
  vfs: add one new mount option '-o hottrack'
  vfs: add init and exit support
  vfs: introduce one hash table
  vfs: enable hot data tracking
  vfs: fork one kthread to update data temperature
  vfs: add 3 new ioctl interfaces
  vfs: add debugfs support
  vfs: add documentation

 Documentation/filesystems/hot_tracking.txt |  106 ++
 fs/Makefile|2 +-
 fs/compat_ioctl.c  |8 +
 fs/dcache.c|2 +
 fs/direct-io.c |   10 +
 fs/hot_tracking.c  | 1563 
 fs/hot_tracking.h  |  163 +++
 fs/ioctl.c |  130 +++
 fs/namespace.c |   10 +
 fs/super.c |   11 +
 include/linux/fs.h |   15 +
 include/linux/hot_tracking.h   |  164 +++
 mm/filemap.c   |8 +
 mm/page-writeback.c|   21 +
 mm/readahead.c |9 +
 15 files changed, 2221 insertions(+), 1 deletions(-)
 create mode 100644 Documentation/filesystems/hot_tracking.txt
 create mode 100644 fs/hot_tracking.c
 create mode 100644 fs/hot_tracking.h
 create mode 100644 include/linux/hot_tracking.h

-- 
1.7.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V3 1/2] Btrfs: cleanup duplicated division functions

2012-09-23 Thread Ilya Dryomov
On Fri, Sep 21, 2012 at 05:07:46PM +0800, Miao Xie wrote:
> div_factor{_fine} has been implemented for two times, and these two functions
> are very similar, so cleanup the reduplicate implement and drop the original
> div_factor(), and then rename div_factor_fine() to div_factor(). So the 
> divisor
> of the new div_factor() is 100, not 10.
> 
> And I move div_factor into a independent file named math.h because it is a
> common math function, may be used by every composition of btrfs.
> 
> Because these functions are mostly used on the hot path, and we are sure
> the parameters are right in the most cases, we don't add complex checks
> for the parameters. But in the other place, we must check and make sure
> the parameters are right. So besides the code cleanup, this patch also
> add a check for the usage of the space balance, it is the only place that
> we need add check to make sure the parameters of div_factor are right till 
> now.
> 
> Signed-off-by: Miao Xie 
> ---
> Changelog v2 -> v3:
> - drop the original div_factor and rename div_factor_fine to div_factor
> - drop the check of the factor
> 
> Changelog v1 -> v2:
> - add missing check
> ---
>  fs/btrfs/extent-tree.c |   29 ++---
>  fs/btrfs/ioctl.c   |   18 ++
>  fs/btrfs/math.h|   33 +
>  fs/btrfs/relocation.c  |2 +-
>  fs/btrfs/transaction.c |2 +-
>  fs/btrfs/volumes.c |   30 +-
>  6 files changed, 64 insertions(+), 50 deletions(-)
>  create mode 100644 fs/btrfs/math.h
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index a010234..bcb9ced 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -33,6 +33,7 @@
>  #include "volumes.h"
>  #include "locking.h"
>  #include "free-space-cache.h"
> +#include "math.h"
>  
>  #undef SCRAMBLE_DELAYED_REFS
>  
> @@ -648,24 +649,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info 
> *info)
>   rcu_read_unlock();
>  }
>  
> -static u64 div_factor(u64 num, int factor)
> -{
> - if (factor == 10)
> - return num;
> - num *= factor;
> - do_div(num, 10);
> - return num;
> -}
> -
> -static u64 div_factor_fine(u64 num, int factor)
> -{
> - if (factor == 100)
> - return num;
> - num *= factor;
> - do_div(num, 100);
> - return num;
> -}
> -
>  u64 btrfs_find_block_group(struct btrfs_root *root,
>  u64 search_start, u64 search_hint, int owner)
>  {
> @@ -674,7 +657,7 @@ u64 btrfs_find_block_group(struct btrfs_root *root,
>   u64 last = max(search_hint, search_start);
>   u64 group_start = 0;
>   int full_search = 0;
> - int factor = 9;
> + int factor = 90;
>   int wrapped = 0;
>  again:
>   while (1) {
> @@ -708,7 +691,7 @@ again:
>   if (!full_search && factor < 10) {
>   last = search_start;
>   full_search = 1;
> - factor = 10;
> + factor = 100;
>   goto again;
>   }
>  found:
> @@ -3513,7 +3496,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
>   if (force == CHUNK_ALLOC_LIMITED) {
>   thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
>   thresh = max_t(u64, 64 * 1024 * 1024,
> -div_factor_fine(thresh, 1));
> +div_factor(thresh, 1));
>  
>   if (num_bytes - num_allocated < thresh)
>   return 1;
> @@ -3521,12 +3504,12 @@ static int should_alloc_chunk(struct btrfs_root *root,
>   thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
>  
>   /* 256MB or 2% of the FS */
> - thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2));
> + thresh = max_t(u64, 256 * 1024 * 1024, div_factor(thresh, 2));
>   /* system chunks need a much small threshold */
>   if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM)
>   thresh = 32 * 1024 * 1024;
>  
> - if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8))
> + if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 80))
>   return 0;
>   return 1;
>  }
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 9384a2a..d8d53f7 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -3335,6 +3335,24 @@ static long btrfs_ioctl_balance(struct file *file, 
> void __user *arg)
>  
>   goto do_balance;
>   }
> +
> + if ((bargs->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
> + (bargs->data.usage < 0 || bargs->data.usage > 100)) {
> + ret = -EINVAL;
> + goto out_bargs;
> + }
> +
> + if ((bargs->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
> + (bargs->meta.usage < 0 || bargs->meta.usage > 100)) {
> + ret = -EINVAL;
> + goto out_bargs;
> + 

Re: [PATCH 2/5] Btrfs: fix trans block rsv regression

2012-09-23 Thread Miao Xie
On fri, 14 Sep 2012 21:05:01 +0800, Liu Bo wrote:
> On 09/14/2012 09:01 PM, Liu Bo wrote:
>> On 09/14/2012 08:41 PM, Josef Bacik wrote:
>>> On Fri, Sep 14, 2012 at 02:58:04AM -0600, Liu Bo wrote:
 In some workloads we have nested joining transaction operations,
 eg.
   run_delalloc_nocow
   btrfs_join_transaction
   cow_file_range
  btrfs_join_transaction

 it can be a serious bug since each trans handler has only two
 block_rsv, orig_rsv and block_rsv, which means we may lose our
 first block_rsv after two joining transaction operations:

 1) btrfs_start_transaction
   trans->block_rsv = A

 2) btrfs_join_transaction
   trans->orig_rsv = trans->block_rsv; ---> orig_rsv is now A
   trans->block_rsv = B

 3) btrfs_join_transaction
   trans->orig_rsv = trans->block_rsv; ---> orig_rsv is now B
   trans->block_rsv = C
...

>>>
>>> I'd like to see the actual stack trace where this happens, because I don't 
>>> think
>>> it can happen.  And if it is we need to look at that specific case and 
>>> adjust it
>>> as necessary and not add a bunch of kmallocs just to track the block_rsv,
>>> because frankly it's not that big of a deal, it was just put into place in 
>>> case
>>> somebody wasn't expecting a call they made to start another transaction and
>>> reset the block_rsv, which I don't actually think happens anywhere.  So NAK 
>>> on
>>> this patch, give me more information so I can figure out the right way to 
>>> deal
>>> with this.  Thanks,
>>>
>>
>> Fine, please run xfstests 068 till it hits a BUG_ON inside either 
>> btrfs_delete_delayed_dir_index or
>> btrfs_insert_delayed_dir_index.
>>
>> What I saw is that the orig_rsv and block_rsv is both delalloc_block_rsv, 
>> which is already lack of space.
>>
> 
> and trans->use_count has been 3.

Hi, Liu

Do you still look into this problem?  I think the following patch can help you.
This patch was made to improve btrfs_run_ordered_operations(), I found it can 
fix
the problem that you pointed out in this mail.

Thanks
Miao

Subject: [PATCH] Btrfs: make ordered operations be handled by multi-thread

Signed-off-by: Miao Xie 
---
 fs/btrfs/ctree.h|1 +
 fs/btrfs/disk-io.c  |7 
 fs/btrfs/ordered-data.c |   88 ++
 fs/btrfs/ordered-data.h |2 +-
 fs/btrfs/transaction.c  |   18 +++--
 5 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index dbb461f..fd7ed9f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1328,6 +1328,7 @@ struct btrfs_fs_info {
 */
struct btrfs_workers fixup_workers;
struct btrfs_workers delayed_workers;
+   struct btrfs_workers ordered_operation_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
int thread_pool_size;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7fb7069..e49665f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2317,6 +2317,10 @@ int open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->readahead_workers, "readahead",
   fs_info->thread_pool_size,
   &fs_info->generic_worker);
+   btrfs_init_workers(&fs_info->ordered_operation_workers,
+  "ordered-operations",
+  fs_info->thread_pool_size,
+  &fs_info->generic_worker);
 
/*
 * endios are largely parallel and should have a very
@@ -2346,6 +2350,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->delayed_workers);
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
+   ret |= btrfs_start_workers(&fs_info->ordered_operation_workers);
if (ret) {
err = -ENOMEM;
goto fail_sb_buffer;
@@ -2649,6 +2654,7 @@ fail_tree_roots:
 
 fail_sb_buffer:
btrfs_stop_workers(&fs_info->generic_worker);
+   btrfs_stop_workers(&fs_info->ordered_operation_workers);
btrfs_stop_workers(&fs_info->readahead_workers);
btrfs_stop_workers(&fs_info->fixup_workers);
btrfs_stop_workers(&fs_info->delalloc_workers);
@@ -3256,6 +3262,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_stop_workers(&fs_info->readahead_workers);
+   btrfs_stop_workers(&fs_info->ordered_operation_workers);
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY))
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 4ae1014..a4b1316 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -26,6 +26,7 @@
 #include "extent_io.h"
 
 static struct kmem_

Re: [PATCH V3 1/2] Btrfs: cleanup duplicated division functions

2012-09-23 Thread Miao Xie
On Fri, 21 Sep 2012 17:24:44 +0200, David Sterba wrote:
> On Fri, Sep 21, 2012 at 05:07:46PM +0800, Miao Xie wrote:
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -3335,6 +3335,24 @@ static long btrfs_ioctl_balance(struct file *file, 
>> void __user *arg)
>>  
>>  goto do_balance;
>>  }
>> +
>> +if ((bargs->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
>> +(bargs->data.usage < 0 || bargs->data.usage > 100)) {
> 
> the 0 checks belong here
> 
>> +ret = -EINVAL;
>> +goto out_bargs;
>> +}
>> +
>> +if ((bargs->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
>> +(bargs->meta.usage < 0 || bargs->meta.usage > 100)) {
>> +ret = -EINVAL;
>> +goto out_bargs;
>> +}
>> +
>> +if ((bargs->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
>> +(bargs->sys.usage < 0 || bargs->sys.usage > 100)) {
>> +ret = -EINVAL;
>> +goto out_bargs;
>> +}
>>  } else {
>>  bargs = NULL;
>>  }
>> @@ -2347,7 +2335,8 @@ static int chunk_usage_filter(struct btrfs_fs_info 
>> *fs_info, u64 chunk_offset,
>>  cache = btrfs_lookup_block_group(fs_info, chunk_offset);
>>  chunk_used = btrfs_block_group_used(&cache->item);
>>  
>> -user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
>> +BUG_ON(bargs->usage < 0 || bargs->usage > 100);
> 
> otherwise it reliably crashes here

Sorry, I don't know why it will crash here if we input 0. I tried to input 0,
and it worked well.

I think the only case we must take into account is the users might input the 
wrong value (>100 or <0)
on the old kernel, and it can be stored into the filesystem. If we mount this 
filesystem
on the new kernel, some problems may happen.

Thanks
Miao
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html