[PATCH 3/4] btrfs-progs: change seen_fsid to hold fd and DIR*

2017-09-25 Thread Misono, Tomohiro
Change seen_fsid to hold fd and DIR* in order to keep access to each fs.
This will be used for 'subvol delete --commit-after'.

Signed-off-by: Tomohiro Misono 
---
 cmds-filesystem.c | 4 ++--
 utils.c   | 6 +-
 utils.h   | 5 -
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index c7dae40..4bbff43 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -277,7 +277,7 @@ static void print_one_uuid(struct btrfs_fs_devices 
*fs_devices,
u64 devs_found = 0;
u64 total;
 
-   if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash))
+   if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash, -1, NULL))
return;
 
uuid_unparse(fs_devices->fsid, uuidbuf);
@@ -324,7 +324,7 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args 
*fs_info,
struct btrfs_ioctl_dev_info_args *tmp_dev_info;
int ret;
 
-   ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash);
+   ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash, -1, NULL);
if (ret == -EEXIST)
return 0;
else if (ret)
diff --git a/utils.c b/utils.c
index f91d41e..bdfbfe0 100644
--- a/utils.c
+++ b/utils.c
@@ -1804,7 +1804,8 @@ int is_seen_fsid(u8 *fsid, struct seen_fsid 
*seen_fsid_hash[])
return 0;
 }
 
-int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[])
+int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
+   int fd, DIR *dirstream)
 {
u8 hash = fsid[0];
int slot = hash % SEEN_FSID_HASH_SIZE;
@@ -1832,6 +1833,8 @@ insert:
 
alloc->next = NULL;
memcpy(alloc->fsid, fsid, BTRFS_FSID_SIZE);
+   alloc->fd = fd;
+   alloc->dirstream = dirstream;
 
if (seen)
seen->next = alloc;
@@ -1851,6 +1854,7 @@ void free_seen_fsid(struct seen_fsid *seen_fsid_hash[])
seen = seen_fsid_hash[slot];
while (seen) {
next = seen->next;
+   close_file_or_dir(seen->fd, seen->dirstream);
free(seen);
seen = next;
}
diff --git a/utils.h b/utils.h
index da34e6c..bac7688 100644
--- a/utils.h
+++ b/utils.h
@@ -107,9 +107,12 @@ int get_fsid(const char *path, u8 *fsid, int silent);
 struct seen_fsid {
u8 fsid[BTRFS_FSID_SIZE];
struct seen_fsid *next;
+   int fd;
+   DIR *dirstream;
 };
 int is_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
-int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
+int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
+   int fd, DIR *dirstream);
 void free_seen_fsid(struct seen_fsid *seen_fsid_hash[]);
 
 int get_label(const char *btrfs_dev, char *label);
-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] btrfs-progs: subvol: fix subvol del --commit-after

2017-09-25 Thread Misono, Tomohiro
Fix subvol del --commit-after to work properly:
 - SYNC ioctl will be issued even when last delete is failed
 - SYNC ioctl will be issued on each file system only once in the end

To achieve this, get_fsid() and add_seen_fsid() is called after each delete
to keep only one fd for each fs.

In the end, seen_fsid_hash will be traversed and SYNC is issued on each fs.

Signed-off-by: Tomohiro Misono 
---
 cmds-subvolume.c | 77 
 1 file changed, 56 insertions(+), 21 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 666f6e0..bcbd737 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -263,6 +263,10 @@ static int cmd_subvol_delete(int argc, char **argv)
DIR *dirstream = NULL;
int verbose = 0;
int commit_mode = 0;
+   u8 fsid[BTRFS_FSID_SIZE];
+   char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+   struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
+   enum { COMMIT_AFTER = 1, COMMIT_EACH = 2 };
 
while (1) {
int c;
@@ -279,10 +283,10 @@ static int cmd_subvol_delete(int argc, char **argv)
 
switch(c) {
case 'c':
-   commit_mode = 1;
+   commit_mode = COMMIT_AFTER;
break;
case 'C':
-   commit_mode = 2;
+   commit_mode = COMMIT_EACH;
break;
case 'v':
verbose++;
@@ -298,7 +302,7 @@ static int cmd_subvol_delete(int argc, char **argv)
if (verbose > 0) {
printf("Transaction commit: %s\n",
!commit_mode ? "none (default)" :
-   commit_mode == 1 ? "at the end" : "after each");
+   commit_mode == COMMIT_AFTER ? "at the end" : "after 
each");
}
 
cnt = optind;
@@ -338,50 +342,81 @@ again:
}
 
printf("Delete subvolume (%s): '%s/%s'\n",
-   commit_mode == 2 || (commit_mode == 1 && cnt + 1 == argc)
-   ? "commit" : "no-commit", dname, vname);
+   commit_mode == COMMIT_EACH ? "commit" : "no-commit", dname, 
vname);
memset(, 0, sizeof(args));
strncpy_null(args.name, vname);
res = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, );
-   if(res < 0 ){
+   if (res < 0) {
error("cannot delete '%s/%s': %s", dname, vname,
strerror(errno));
ret = 1;
goto out;
}
 
-   if (commit_mode == 1) {
+   if (commit_mode == COMMIT_EACH) {
res = wait_for_commit(fd);
if (res < 0) {
-   error("unable to wait for commit after '%s': %s",
+   error("unable to wait for commit after deleting '%s': 
%s",
path, strerror(errno));
ret = 1;
}
+   } else if (commit_mode == COMMIT_AFTER) {
+   res = get_fsid(dname, fsid, 0);
+   if (res < 0) {
+   error("unable to get fsid for '%s': %s", path, 
strerror(-res));
+   error("delete suceeded but commit may not be done in 
the end");
+   ret = 1;
+   goto out;
+   }
+
+   if (add_seen_fsid(fsid, seen_fsid_hash, fd, dirstream) == 0) {
+   if (verbose > 0) {
+   uuid_unparse(fsid, uuidbuf);
+   printf("  new fs is found for '%s', fsid: 
%s\n", path, uuidbuf);
+   }
+   // this is the first time a subvolume on this 
filesystem is deleted
+   // keep fd in order to issue SYNC ioctl in the end
+   goto keep_fd;
+   }
}
 
 out:
+   close_file_or_dir(fd, dirstream);
+keep_fd:
+   fd = -1;
+   dirstream = NULL;
free(dupdname);
free(dupvname);
dupdname = NULL;
dupvname = NULL;
cnt++;
-   if (cnt < argc) {
-   close_file_or_dir(fd, dirstream);
-   /* avoid double free */
-   fd = -1;
-   dirstream = NULL;
+   if (cnt < argc)
goto again;
-   }
 
-   if (commit_mode == 2 && fd != -1) {
-   res = wait_for_commit(fd);
-   if (res < 0) {
-   error("unable to do final sync after deletion: %s",
-   strerror(errno));
-   ret = 1;
+   if (commit_mode == COMMIT_AFTER) {
+   // traverse seen_fsid_hash and issue SYNC ioctl on each 
filesystem
+   int slot;
+   struct seen_fsid *seen;
+
+   for (slot = 0; slot < SEEN_FSID_HASH_SIZE; slot++) {
+   seen = 

[PATCH 2/4] btrfs-progs: move seen_fsid to util.c

2017-09-25 Thread Misono, Tomohiro
Make is_seen_fsid()/add_seen_fsid()/free_seen_fsid() to common functions.
This will be used for 'subvol delete --commit-after'.

Signed-off-by: Tomohiro Misono 
---
 cmds-filesystem.c | 88 ---
 utils.c   | 70 +++
 utils.h   | 11 +++
 3 files changed, 86 insertions(+), 83 deletions(-)

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 018857c..c7dae40 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -30,7 +30,6 @@
 
 #include "kerncompat.h"
 #include "ctree.h"
-#include "ioctl.h"
 #include "utils.h"
 #include "volumes.h"
 #include "commands.h"
@@ -43,85 +42,8 @@
  * for btrfs fi show, we maintain a hash of fsids we've already printed.
  * This way we don't print dups if a given FS is mounted more than once.
  */
-#define SEEN_FSID_HASH_SIZE 256
-
-struct seen_fsid {
-   u8 fsid[BTRFS_FSID_SIZE];
-   struct seen_fsid *next;
-};
-
 static struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
 
-static int is_seen_fsid(u8 *fsid)
-{
-   u8 hash = fsid[0];
-   int slot = hash % SEEN_FSID_HASH_SIZE;
-   struct seen_fsid *seen = seen_fsid_hash[slot];
-
-   while (seen) {
-   if (memcmp(seen->fsid, fsid, BTRFS_FSID_SIZE) == 0)
-   return 1;
-
-   seen = seen->next;
-   }
-
-   return 0;
-}
-
-static int add_seen_fsid(u8 *fsid)
-{
-   u8 hash = fsid[0];
-   int slot = hash % SEEN_FSID_HASH_SIZE;
-   struct seen_fsid *seen = seen_fsid_hash[slot];
-   struct seen_fsid *alloc;
-
-   if (!seen)
-   goto insert;
-
-   while (1) {
-   if (memcmp(seen->fsid, fsid, BTRFS_FSID_SIZE) == 0)
-   return -EEXIST;
-
-   if (!seen->next)
-   break;
-
-   seen = seen->next;
-   }
-
-insert:
-
-   alloc = malloc(sizeof(*alloc));
-   if (!alloc)
-   return -ENOMEM;
-
-   alloc->next = NULL;
-   memcpy(alloc->fsid, fsid, BTRFS_FSID_SIZE);
-
-   if (seen)
-   seen->next = alloc;
-   else
-   seen_fsid_hash[slot] = alloc;
-
-   return 0;
-}
-
-static void free_seen_fsid(void)
-{
-   int slot;
-   struct seen_fsid *seen;
-   struct seen_fsid *next;
-
-   for (slot = 0; slot < SEEN_FSID_HASH_SIZE; slot++) {
-   seen = seen_fsid_hash[slot];
-   while (seen) {
-   next = seen->next;
-   free(seen);
-   seen = next;
-   }
-   seen_fsid_hash[slot] = NULL;
-   }
-}
-
 static const char * const filesystem_cmd_group_usage[] = {
"btrfs filesystem []  []",
NULL
@@ -355,7 +277,7 @@ static void print_one_uuid(struct btrfs_fs_devices 
*fs_devices,
u64 devs_found = 0;
u64 total;
 
-   if (add_seen_fsid(fs_devices->fsid))
+   if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash))
return;
 
uuid_unparse(fs_devices->fsid, uuidbuf);
@@ -402,7 +324,7 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args 
*fs_info,
struct btrfs_ioctl_dev_info_args *tmp_dev_info;
int ret;
 
-   ret = add_seen_fsid(fs_info->fsid);
+   ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash);
if (ret == -EEXIST)
return 0;
else if (ret)
@@ -474,7 +396,7 @@ static int btrfs_scan_kernel(void *search, unsigned 
unit_mode)
goto out;
 
/* skip all fs already shown as mounted fs */
-   if (is_seen_fsid(fs_info_arg.fsid))
+   if (is_seen_fsid(fs_info_arg.fsid, seen_fsid_hash))
continue;
 
ret = get_label_mounted(mnt->mnt_dir, label);
@@ -676,7 +598,7 @@ static int search_umounted_fs_uuids(struct list_head 
*all_uuids,
}
 
/* skip all fs already shown as mounted fs */
-   if (is_seen_fsid(cur_fs->fsid))
+   if (is_seen_fsid(cur_fs->fsid, seen_fsid_hash))
continue;
 
fs_copy = calloc(1, sizeof(*fs_copy));
@@ -908,7 +830,7 @@ devs_only:
free_fs_devices(fs_devices);
}
 out:
-   free_seen_fsid();
+   free_seen_fsid(seen_fsid_hash);
return ret;
 }
 
diff --git a/utils.c b/utils.c
index 4a5dc60..f91d41e 100644
--- a/utils.c
+++ b/utils.c
@@ -1788,6 +1788,76 @@ out:
return ret;
 }
 
+int is_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[])
+{
+   u8 hash = fsid[0];
+   int slot = hash % SEEN_FSID_HASH_SIZE;
+   struct seen_fsid *seen = seen_fsid_hash[slot];
+
+   while (seen) {
+   if (memcmp(seen->fsid, fsid, BTRFS_FSID_SIZE) == 0)
+   return 1;
+
+   seen = seen->next;
+   }
+
+   return 0;
+}
+
+int 

[PATCH 1/4] btrfs-progs: move get_fsid() to util.c

2017-09-25 Thread Misono, Tomohiro
Make get_fsid() to a common function.
This will be used for 'subvol delete --commit-after'.

Signed-off-by: Tomohiro Misono 
---
 cmds-property.c | 30 --
 utils.c | 31 +++
 utils.h |  1 +
 3 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/cmds-property.c b/cmds-property.c
index 9ae1246..03bafa0 100644
--- a/cmds-property.c
+++ b/cmds-property.c
@@ -48,36 +48,6 @@ static int parse_prop(const char *arg, const struct 
prop_handler *props,
return -1;
 }
 
-static int get_fsid(const char *path, u8 *fsid, int silent)
-{
-   int ret;
-   int fd;
-   struct btrfs_ioctl_fs_info_args args;
-
-   fd = open(path, O_RDONLY);
-   if (fd < 0) {
-   ret = -errno;
-   if (!silent)
-   error("failed to open %s: %s", path,
-   strerror(-ret));
-   goto out;
-   }
-
-   ret = ioctl(fd, BTRFS_IOC_FS_INFO, );
-   if (ret < 0) {
-   ret = -errno;
-   goto out;
-   }
-
-   memcpy(fsid, args.fsid, BTRFS_FSID_SIZE);
-   ret = 0;
-
-out:
-   if (fd != -1)
-   close(fd);
-   return ret;
-}
-
 static int check_btrfs_object(const char *object)
 {
int ret;
diff --git a/utils.c b/utils.c
index 7a2710f..4a5dc60 100644
--- a/utils.c
+++ b/utils.c
@@ -1758,6 +1758,37 @@ out:
return ret;
 }
 
+int get_fsid(const char *path, u8 *fsid, int silent)
+{
+   int ret;
+   int fd;
+   struct btrfs_ioctl_fs_info_args args;
+
+   fd = open(path, O_RDONLY);
+   if (fd < 0) {
+   ret = -errno;
+   if (!silent)
+   error("failed to open %s: %s", path,
+   strerror(-ret));
+   goto out;
+   }
+
+   ret = ioctl(fd, BTRFS_IOC_FS_INFO, );
+   if (ret < 0) {
+   ret = -errno;
+   goto out;
+   }
+
+   memcpy(fsid, args.fsid, BTRFS_FSID_SIZE);
+   ret = 0;
+
+out:
+   if (fd != -1)
+   close(fd);
+   return ret;
+}
+
+
 static int group_profile_devs_min(u64 flag)
 {
switch (flag & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
diff --git a/utils.h b/utils.h
index d28a05a..b3aabe1 100644
--- a/utils.h
+++ b/utils.h
@@ -100,6 +100,7 @@ int open_file_or_dir3(const char *fname, DIR **dirstream, 
int open_flags);
 void close_file_or_dir(int fd, DIR *dirstream);
 int get_fs_info(const char *path, struct btrfs_ioctl_fs_info_args *fi_args,
struct btrfs_ioctl_dev_info_args **di_ret);
+int get_fsid(const char *path, u8 *fsid, int silent);
 int get_label(const char *btrfs_dev, char *label);
 int set_label(const char *btrfs_dev, const char *label);
 
-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4] btrfs-progs: subvol: fix del --commit-after

2017-09-25 Thread Misono, Tomohiro
Fix subvol del --commit-after to work properly:
 - SYNC ioctl will be issued even when last delete is failed
 - SYNC ioctl will be issued on each file system only once in the end

To achieve this, each deleted subvol's (parent's) fsid is checked each
time. If the fsid is seen for the first time, its fd will be kept in order
to issue SYNC ioctl in the end.

There already exists get_fsid() in cmds-property.c and seen_fsid which
keeps fsid in hush function in cmds-filesystem.c. First three patches
make them to common functions and last one is the main part.

Tomohiro Misono (4):
  btrfs-progs: move get_fsid() to util.c
  btrfs-progs: move seen_fsid to util.c
  btrfs-progs: change seen_fsid to hold fd and DIR*
  btrfs-progs: subvol: fix subvol del --commit-after

 cmds-filesystem.c |  88 +++--
 cmds-property.c   |  30 
 cmds-subvolume.c  |  77 ---
 utils.c   | 105 ++
 utils.h   |  15 
 5 files changed, 181 insertions(+), 134 deletions(-)

-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: question: behavior of original check

2017-09-25 Thread Qu Wenruo



On 2017年09月26日 11:01, Su Yue wrote:

Hi

During writing patches about check, I found something confusing
about btrfs-progs original check.

Setup:
Version: Btrfs progs v4.13.1

$ make TEST=006\* test-fsck
[TEST]   fsck-tests.sh
[TEST/fsck]   006-bad-root-items

$ cat tests/fsck-tests-results.txt | tail -n  25
### .../btrfs-progs/btrfs check test.img
checking extents
ref mismatch on [15474688 905216] extent item 1, found 4
Backref 15474688 parent 30883840 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 30883840 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b2770
Backref 15474688 parent 31326208 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 31326208 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b5390
Backref 15474688 parent 31817728 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 31817728 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b5330

backpointer mismatch on [15474688 905216]
checking free space cache
checking fs roots
checking csums
checking root refs
Checking filesystem on test.img
UUID: 3857c23d-4219-4600-a636-ac7707dc4ff3
cache and super generation don't match, space cache will be invalidated
found 6291456 bytes used, *no error* found
total csum bytes: 660
total tree bytes: 786432
total fs tree bytes: 688128
total extent tree bytes: 16384
btree space waste bytes: 459860
file data blocks allocated: 35536896
  referenced 25890816


It seems that errors in extent-tree are not important so btrfs-progs 
just returns 0.


It's a bug that ignored the error found in extent tree.
It's exposed quite a long time ago IIRC.



Is this behavior normal(on purpose)?


Definitely not.


I have written a patch to fix it but then test-fsck/006 will fail due to
nonzero value progs returned.


The problem is that btrfs check doesn't repair the bug.

You would better also fix the repair code.

Thanks,
Qu



Thanks,
Su


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: question: behavior of original check

2017-09-25 Thread Qu Wenruo



On 2017年09月26日 11:01, Su Yue wrote:

Hi

During writing patches about check, I found something confusing
about btrfs-progs original check.

Setup:
Version: Btrfs progs v4.13.1

$ make TEST=006\* test-fsck
[TEST]   fsck-tests.sh
[TEST/fsck]   006-bad-root-items

$ cat tests/fsck-tests-results.txt | tail -n  25
### .../btrfs-progs/btrfs check test.img
checking extents
ref mismatch on [15474688 905216] extent item 1, found 4
Backref 15474688 parent 30883840 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 30883840 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b2770
Backref 15474688 parent 31326208 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 31326208 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b5390
Backref 15474688 parent 31817728 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 31817728 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b5330

backpointer mismatch on [15474688 905216]
checking free space cache
checking fs roots
checking csums
checking root refs
Checking filesystem on test.img
UUID: 3857c23d-4219-4600-a636-ac7707dc4ff3
cache and super generation don't match, space cache will be invalidated
found 6291456 bytes used, *no error* found
total csum bytes: 660
total tree bytes: 786432
total fs tree bytes: 688128
total extent tree bytes: 16384
btree space waste bytes: 459860
file data blocks allocated: 35536896
  referenced 25890816


It seems that errors in extent-tree are not important so btrfs-progs 
just returns 0.


It's a bug that ignored the error found in extent tree.
It's exposed quite a long time ago IIRC.



Is this behavior normal(on purpose)?


Definitely not.


I have written a patch to fix it but then test-fsck/006 will fail due to
nonzero value progs returned.


The problem is that btrfs check doesn't repair the bug.

You would better also fix the repair code.

Thanks,
Qu



Thanks,
Su


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


question: behavior of original check

2017-09-25 Thread Su Yue

Hi

During writing patches about check, I found something confusing
about btrfs-progs original check.

Setup:
Version: Btrfs progs v4.13.1

$ make TEST=006\* test-fsck
[TEST]   fsck-tests.sh
[TEST/fsck]   006-bad-root-items

$ cat tests/fsck-tests-results.txt | tail -n  25
### .../btrfs-progs/btrfs check test.img
checking extents
ref mismatch on [15474688 905216] extent item 1, found 4
Backref 15474688 parent 30883840 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 30883840 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b2770
Backref 15474688 parent 31326208 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 31326208 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b5390
Backref 15474688 parent 31817728 owner 0 offset 0 num_refs 0 not found 
in extent tree
Incorrect local backref count on 15474688 parent 31817728 owner 0 offset 
0 found 1 wanted 0 back 0x561cdc0b5330

backpointer mismatch on [15474688 905216]
checking free space cache
checking fs roots
checking csums
checking root refs
Checking filesystem on test.img
UUID: 3857c23d-4219-4600-a636-ac7707dc4ff3
cache and super generation don't match, space cache will be invalidated
found 6291456 bytes used, *no error* found
total csum bytes: 660
total tree bytes: 786432
total fs tree bytes: 688128
total extent tree bytes: 16384
btree space waste bytes: 459860
file data blocks allocated: 35536896
 referenced 25890816


It seems that errors in extent-tree are not important so btrfs-progs 
just returns 0.


Is this behavior normal(on purpose)?
I have written a patch to fix it but then test-fsck/006 will fail due to
nonzero value progs returned.

Thanks,
Su


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: rework outstanding_extents

2017-09-25 Thread Lu Fengqi
On Mon, Sep 25, 2017 at 04:00:09PM +0200, David Sterba wrote:
>On Thu, Aug 24, 2017 at 05:31:15PM +0800, Lu Fengqi wrote:
>> On Thu, Aug 17, 2017 at 01:01:29PM -0400, jo...@toxicpanda.com wrote:
>> >From: Josef Bacik 
>> >
>> >Right now we do a lot of weird hoops around outstanding_extents in order
>> >to keep the extent count consistent.  This is because we logically
>> >transfer the outstanding_extent count from the initial reservation
>> >through the set_delalloc_bits.  This makes it pretty difficult to get a
>> >handle on how and when we need to mess with outstanding_extents.
>> >
>> >Fix this by revamping the rules of how we deal with outstanding_extents.
>> >Now instead everybody that is holding on to a delalloc extent is
>> >required to increase the outstanding extents count for itself.  This
>> >means we'll have something like this
>> >
>> >btrfs_dealloc_reserve_metadata  - outstanding_extents = 1
>> > btrfs_set_delalloc - outstanding_extents = 2
>> >btrfs_release_delalloc_extents  - outstanding_extents = 1
>> >
>> >for an initial file write.  Now take the append write where we extend an
>> >existing delalloc range but still under the maximum extent size
>> >
>> >btrfs_delalloc_reserve_metadata - outstanding_extents = 2
>> >  btrfs_set_delalloc
>> >btrfs_set_bit_hook  - outstanding_extents = 3
>> >btrfs_merge_bit_hook- outstanding_extents = 2
>> >btrfs_release_delalloc_extents  - outstanding_extnets = 1
>> >
>> >In order to make the ordered extent transition we of course must now
>> >make ordered extents carry their own outstanding_extent reservation, so
>> >for cow_file_range we end up with
>> >
>> >btrfs_add_ordered_extent- outstanding_extents = 2
>> >clear_extent_bit- outstanding_extents = 1
>> >btrfs_remove_ordered_extent - outstanding_extents = 0
>> >
>> >This makes all manipulations of outstanding_extents much more explicit.
>> >Every successful call to btrfs_reserve_delalloc_metadata _must_ now be
>> >combined with btrfs_release_delalloc_extents, even in the error case, as
>> >that is the only function that actually modifies the
>> >outstanding_extents counter.
>> 
>> s/btrfs_reserve_delalloc_metadata/btrfs_delalloc_reserve_metadata
>> s/btrfs_release_delalloc_extents/btrfs_delalloc_release_extents
>> 
>> Acked-by: Lu Fengqi 
>
>BTW,
>
>Documentation/process/5.Posting.rst
>
>216  - Acked-by: indicates an agreement by another developer (often a
>217maintainer of the relevant code) that the patch is appropriate for
>218inclusion into the kernel.
>
>and is commonly used by maintainers when someone else modifies the
>respective codebase by tree-wide, API or similar.
>
>If you agree with the way the patch is solving a particular problem and
>think it's important enough to be stated, you can write that in plain
>text.
>
>

Sorry for any inconvenience caused. I misunderstood about the meaning of
Acked-by.

I just want to express that it looks good to me.

-- 
Thanks,
Lu


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/4] btrfs: Add sanity check for EXTENT_DATA when reading out leaf

2017-09-25 Thread Qu Wenruo



On 2017年09月26日 08:28, Qu Wenruo wrote:



On 2017年09月25日 23:45, David Sterba wrote:

On Wed, Aug 23, 2017 at 04:57:58PM +0900, Qu Wenruo wrote:

Add extra checker for item with EXTENT_DATA type.
This checks the following thing:
0) Key offset
    All key offset must be aligned to sectorsize.
    Inline extent must have 0 for key offset.

1) Item size
    Plain text inline file extent size must match item size.


'plain text' seems to be a bit misleading, I don't think we've ever
referred to uncompressed extent as such, although it makes some sense. I
think 'uncompressed' would work too.


I'll use 'uncompressed' instead.




    (compressed inline file extent has no info about its on-disk size)
    Regular/preallocated file extent size must be a fixed value.

2) Every member of regular file extent item
    Including alignment for bytenr and offset, possible value for
    compression/encryption/type.

3) Type/compression/encode must be one of the valid values.

This should be the most comprehensive and restrict check in the context
of btrfs_item for EXTENT_DATA.

Signed-off-by: Qu Wenruo 
---
  fs/btrfs/disk-io.c  | 108 


  include/uapi/linux/btrfs_tree.h |   1 +
  2 files changed, 109 insertions(+)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e034d08bd036..b92296c6a698 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -549,6 +549,103 @@ static int check_tree_block_fsid(struct 
btrfs_fs_info *fs_info,

 btrfs_header_level(eb) == 0 ? "leaf" : "node",    \
 reason, btrfs_header_bytenr(eb), root->objectid, slot)
+static int check_extent_data_item(struct btrfs_root *root,
+  struct extent_buffer *leaf,
+  struct btrfs_key *key, int slot)
+{
+    struct btrfs_file_extent_item *fi;
+    u32 sectorsize = root->fs_info->sectorsize;
+    u32 item_size = btrfs_item_size_nr(leaf, slot);
+
+    if (!IS_ALIGNED(key->offset, sectorsize)) {
+    CORRUPT("unaligned key offset for file extent",


The CORRUPT macro does not print any details beyond what it gets from
the parameters, so here we'd like to know which extent it is and what's
the size. The sectorsize can be found elsewhere so it does not need
to be printed.


Did you mean, the corrupt can be enhanced just like btrfs_printk() to 
have custom format and args list?


Thanks,
Qu



Did you mean despite bytenr of the tree block and root objectid, we 
should output more info about the key?


By "size" did you mean the disk_num_bytes of the extent?

But since the offset is already invalid, I don't think we can trust any 
data from that structure, so what we can output is only about the key.


I can enhance the output to output the key, but I'm afraid I can't 
output anything more than that.





+    leaf, root, slot);
+    return -EUCLEAN;
+    }
+
+    fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+    if (btrfs_file_extent_type(leaf, fi) >= 
BTRFS_FILE_EXTENT_LAST_TYPE) {

+    CORRUPT("invalid file extent type", leaf, root, slot);


"invalid file extent type %d"

and actually, we could add some item-specific helpers to report the
corruption so if it's for an extent, print the generic extent
information, plus an additional information what exactly was wrong.


For additional info I'm completely fine.

But I'm not sure if it's good to output more info about the item.

The biggest problem is, when any member of the item can't pass 
validation checker, we can't trust the whole item.

So the item-specific info may not contain meaningful info.




+    return -EUCLEAN;
+    }
+
+    /*
+ * Support for new compression/encrption must introduce incompat 
flag,

+ * and must be caught in open_ctree().
+ */
+    if (btrfs_file_extent_compression(leaf, fi) >= 
BTRFS_COMPRESS_LAST) {

+    CORRUPT("invalid file extent compression", leaf, root, slot);
+    return -EUCLEAN;
+    }
+    if (btrfs_file_extent_encryption(leaf, fi)) {
+    CORRUPT("invalid file extent encryption", leaf, root, slot);
+    return -EUCLEAN;
+    }
+    if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
+    /* Inline extent must have 0 as key offset */
+    if (key->offset) {
+    CORRUPT("inline extent has non-zero key offset",
+    leaf, root, slot);
+    return -EUCLEAN;
+    }
+
+    /* Compressed inline extent has no on-disk size, skip it */
+    if (btrfs_file_extent_compression(leaf, fi) !=
+    BTRFS_COMPRESS_NONE)
+    return 0;
+
+    /* Plaintext inline extent size must match item size */
+    if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
+    btrfs_file_extent_ram_bytes(leaf, fi)) {
+    CORRUPT("plaintext inline extent has invalid size",
+    leaf, root, slot);
+    return -EUCLEAN;
+    }
+    return 0;
+    }
+
+
+    /* 

Re: [PATCH v2 3/4] btrfs: Add sanity check for EXTENT_DATA when reading out leaf

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 23:45, David Sterba wrote:

On Wed, Aug 23, 2017 at 04:57:58PM +0900, Qu Wenruo wrote:

Add extra checker for item with EXTENT_DATA type.
This checks the following thing:
0) Key offset
All key offset must be aligned to sectorsize.
Inline extent must have 0 for key offset.

1) Item size
Plain text inline file extent size must match item size.


'plain text' seems to be a bit misleading, I don't think we've ever
referred to uncompressed extent as such, although it makes some sense. I
think 'uncompressed' would work too.


I'll use 'uncompressed' instead.




(compressed inline file extent has no info about its on-disk size)
Regular/preallocated file extent size must be a fixed value.

2) Every member of regular file extent item
Including alignment for bytenr and offset, possible value for
compression/encryption/type.

3) Type/compression/encode must be one of the valid values.

This should be the most comprehensive and restrict check in the context
of btrfs_item for EXTENT_DATA.

Signed-off-by: Qu Wenruo 
---
  fs/btrfs/disk-io.c  | 108 
  include/uapi/linux/btrfs_tree.h |   1 +
  2 files changed, 109 insertions(+)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e034d08bd036..b92296c6a698 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -549,6 +549,103 @@ static int check_tree_block_fsid(struct btrfs_fs_info 
*fs_info,
   btrfs_header_level(eb) == 0 ? "leaf" : "node",   \
   reason, btrfs_header_bytenr(eb), root->objectid, slot)
  
+static int check_extent_data_item(struct btrfs_root *root,

+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+   struct btrfs_file_extent_item *fi;
+   u32 sectorsize = root->fs_info->sectorsize;
+   u32 item_size = btrfs_item_size_nr(leaf, slot);
+
+   if (!IS_ALIGNED(key->offset, sectorsize)) {
+   CORRUPT("unaligned key offset for file extent",


The CORRUPT macro does not print any details beyond what it gets from
the parameters, so here we'd like to know which extent it is and what's
the size. The sectorsize can be found elsewhere so it does not need
to be printed.


Did you mean despite bytenr of the tree block and root objectid, we 
should output more info about the key?


By "size" did you mean the disk_num_bytes of the extent?

But since the offset is already invalid, I don't think we can trust any 
data from that structure, so what we can output is only about the key.


I can enhance the output to output the key, but I'm afraid I can't 
output anything more than that.





+   leaf, root, slot);
+   return -EUCLEAN;
+   }
+
+   fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+   if (btrfs_file_extent_type(leaf, fi) >= BTRFS_FILE_EXTENT_LAST_TYPE) {
+   CORRUPT("invalid file extent type", leaf, root, slot);


"invalid file extent type %d"

and actually, we could add some item-specific helpers to report the
corruption so if it's for an extent, print the generic extent
information, plus an additional information what exactly was wrong.


For additional info I'm completely fine.

But I'm not sure if it's good to output more info about the item.

The biggest problem is, when any member of the item can't pass 
validation checker, we can't trust the whole item.

So the item-specific info may not contain meaningful info.




+   return -EUCLEAN;
+   }
+
+   /*
+* Support for new compression/encrption must introduce incompat flag,
+* and must be caught in open_ctree().
+*/
+   if (btrfs_file_extent_compression(leaf, fi) >= BTRFS_COMPRESS_LAST) {
+   CORRUPT("invalid file extent compression", leaf, root, slot);
+   return -EUCLEAN;
+   }
+   if (btrfs_file_extent_encryption(leaf, fi)) {
+   CORRUPT("invalid file extent encryption", leaf, root, slot);
+   return -EUCLEAN;
+   }
+   if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
+   /* Inline extent must have 0 as key offset */
+   if (key->offset) {
+   CORRUPT("inline extent has non-zero key offset",
+   leaf, root, slot);
+   return -EUCLEAN;
+   }
+
+   /* Compressed inline extent has no on-disk size, skip it */
+   if (btrfs_file_extent_compression(leaf, fi) !=
+   BTRFS_COMPRESS_NONE)
+   return 0;
+
+   /* Plaintext inline extent size must match item size */
+   if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
+   btrfs_file_extent_ram_bytes(leaf, fi)) {
+   CORRUPT("plaintext inline extent has invalid size",
+   

[PATCH] New ioctl BTRFS_IOC_GET_CHUNK_INFO

2017-09-25 Thread Goffredo Baroncelli
From: Goffredo Baroncelli 

Add a new ioctl to get info about chunk without requiring the root privileges.
This allow to a non root user to know how the space of the filesystem is
allocated.

Signed-off-by: Goffredo Baroncelli 
---
 fs/btrfs/ioctl.c   | 215 +
 include/uapi/linux/btrfs.h |  38 
 2 files changed, 253 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fa1b78cf25f6..d9ba9ed52693 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2220,6 +2220,219 @@ static noinline int btrfs_ioctl_tree_search_v2(struct 
file *file,
return ret;
 }
 
+/*
+ * Return:
+ * 0   -> copied all data, possible further data
+ * 1   -> copied all data, no further data
+ * -EAGAIN -> not enough space, restart it
+ * -EFAULT -> the user passed an invalid address/size pair
+ */
+static noinline int copy_chunk_info(struct btrfs_path *path,
+  char __user *ubuf,
+  size_t buf_size,
+  u64 *used_buf,
+  int *num_found,
+  u64 *offset)
+{
+   struct extent_buffer *leaf;
+   unsigned long item_off;
+   unsigned long item_len;
+   int nritems;
+   int i;
+   int slot;
+   int ret = 0;
+   struct btrfs_key key;
+
+   leaf = path->nodes[0];
+   slot = path->slots[0];
+   nritems = btrfs_header_nritems(leaf);
+
+   for (i = slot; i < nritems; i++) {
+   u64 destsize;
+   struct btrfs_chunk_info ci;
+   struct btrfs_chunk chunk;
+   int j, chunk_size;
+
+   item_off = btrfs_item_ptr_offset(leaf, i);
+   item_len = btrfs_item_size_nr(leaf, i);
+
+   btrfs_item_key_to_cpu(leaf, , i);
+   /*
+* we are not interested in other items type
+*/
+   if (key.type != BTRFS_CHUNK_ITEM_KEY) {
+   ret = 1;
+   goto out;
+   }
+
+   /*
+* In any case, the next search must start from here
+*/
+   *offset = key.offset;
+   read_extent_buffer(leaf, , item_off, sizeof(chunk));
+
+   /*
+* chunk.num_stripes-1 is correct, because btrfs_chunk includes
+* already a stripe
+*/
+   destsize = sizeof(struct btrfs_chunk_info) +
+   (chunk.num_stripes - 1) * sizeof(struct btrfs_stripe);
+
+   BUG_ON(destsize > item_len);
+
+   if (buf_size < destsize + *used_buf) {
+   if (*num_found)
+   /* try onother time */
+   ret = -EAGAIN;
+   else
+   /* in any case the buffer is too small */
+   ret = -EOVERFLOW;
+   goto out;
+   }
+
+   /* copy chunk */
+   chunk_size = offsetof(struct btrfs_chunk_info, stripes);
+   memset(, 0, chunk_size);
+   ci.length = btrfs_stack_chunk_length();
+   ci.stripe_len = btrfs_stack_chunk_stripe_len();
+   ci.type = btrfs_stack_chunk_type();
+   ci.num_stripes = btrfs_stack_chunk_num_stripes();
+   ci.sub_stripes = btrfs_stack_chunk_sub_stripes();
+   ci.offset = key.offset;
+
+   if (copy_to_user(ubuf + *used_buf, , chunk_size)) {
+   ret = -EFAULT;
+   goto out;
+   }
+   *used_buf += chunk_size;
+
+   /* copy stripes */
+   for (j = 0 ; j < chunk.num_stripes ; j++) {
+   struct btrfs_stripe chunk_stripe;
+   struct btrfs_chunk_info_stripe csi;
+
+   /*
+* j-1 is correct, because btrfs_chunk includes already
+* a stripe
+*/
+   read_extent_buffer(leaf, _stripe,
+   item_off + sizeof(struct btrfs_chunk) +
+   sizeof(struct btrfs_stripe) *
+   (j - 1), sizeof(chunk_stripe));
+
+   memset(, 0, sizeof(csi));
+
+   csi.devid = btrfs_stack_stripe_devid(_stripe);
+   csi.offset = btrfs_stack_stripe_offset(_stripe);
+   memcpy(csi.dev_uuid, chunk_stripe.dev_uuid,
+   sizeof(chunk_stripe.dev_uuid));
+   if (copy_to_user(ubuf + *used_buf, , sizeof(csi))) {
+   ret = -EFAULT;
+ 

[RFC] btrfs: new ioctl BTRFS_IOC_GET_CHUNK_INFO

2017-09-25 Thread Goffredo Baroncelli
The aim of this patch is to replace the BTRFS_IOC_TREE_SEARCH ioctl when
it is used to obtain information about the chunks/block groups. 

The problems in using the BTRFS_IOC_TREE_SEARCH is that it access
the very low data structure of BTRFS. This means: 
1) this would be complicated a possible change of the disk format
2) it requires the root privileges

The new BTRFS_IOC_GET_CHUNK_INFO is an attempt to address these issue.
This ioctl can be called from a not root user: I think that the data exposed 
are not sensibile data.

In a separate patches set, there is an update to the btrfs utility to
allow it to use this new ioctl int the load_chunk_info() function.
So now it is possible to do "btrfs fi us" without root privileges.

This is an RFC, because I want to be sure that
1) it is right my understanding that the chunk info are not a "sensibile data"
2) the api is a good api. In particular I am not sure that returning -EAGAIN is
the right thing to do when further data is available but the buffer is not
enough.

Comments are welcome
BR
G.Baroncelli


-- 
gpg @keyserver.linux.it: Goffredo Baroncelli 
Key fingerprint BBF5 1610 0B64 DAC6 5F7D  17B2 0EDA 9B37 8B82 E0B5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] Use the new ioctl BTRFS_IOC_GET_CHUNK_INFO for load_chunk_info()

2017-09-25 Thread Goffredo Baroncelli
From: Goffredo Baroncelli 

Use the new ioctl BTRFS_IOC_GET_CHUNK_INFO in load_chunk_info() instead of the
BTRFS_IOC_TREE_SEARCH. The old method is still present as fallback for
compatibility reason.

The goal is to avoid BTRFS_IOC_GET_CHUNK_INFO because it requires root
privileges.

Signed-off-by: Goffredo Baroncelli 
---
 cmds-fi-usage.c | 108 ++--
 ioctl.h |  60 +++
 2 files changed, 165 insertions(+), 3 deletions(-)

diff --git a/cmds-fi-usage.c b/cmds-fi-usage.c
index 6c846c15..6a101e28 100644
--- a/cmds-fi-usage.c
+++ b/cmds-fi-usage.c
@@ -36,7 +36,7 @@
 /*
  * Add the chunk info to the chunk_info list
  */
-static int add_info_to_list(struct chunk_info **info_ptr,
+static int legacy_add_info_to_list(struct chunk_info **info_ptr,
int *info_count,
struct btrfs_chunk *chunk)
 {
@@ -127,7 +127,8 @@ static int cmp_chunk_info(const void *a, const void *b)
((struct chunk_info *)b)->type);
 }
 
-static int load_chunk_info(int fd, struct chunk_info **info_ptr, int 
*info_count)
+static int legacy_load_chunk_info(int fd, struct chunk_info **info_ptr,
+   int *info_count)
 {
int ret;
struct btrfs_ioctl_search_args args;
@@ -180,7 +181,7 @@ static int load_chunk_info(int fd, struct chunk_info 
**info_ptr, int *info_count
off += sizeof(*sh);
item = (struct btrfs_chunk *)(args.buf + off);
 
-   ret = add_info_to_list(info_ptr, info_count, item);
+   ret = legacy_add_info_to_list(info_ptr, info_count, 
item);
if (ret) {
*info_ptr = NULL;
return 1;
@@ -213,6 +214,107 @@ static int load_chunk_info(int fd, struct chunk_info 
**info_ptr, int *info_count
return 0;
 }
 
+/*
+ * Add the chunk info to the chunk_info list
+ */
+static int add_info_to_list(struct chunk_info **info_ptr,
+   int *info_count,
+   struct btrfs_chunk_info *chunk)
+{
+
+   u64 type = chunk->type;
+   u64 size = chunk->length;
+   int num_stripes = chunk->num_stripes;
+   int j;
+
+   for (j = 0 ; j < num_stripes ; j++) {
+   int i;
+   struct chunk_info *p = NULL;
+   u64devid;
+
+   devid = chunk->stripes[j].devid;
+
+   for (i = 0 ; i < *info_count ; i++)
+   if ((*info_ptr)[i].type == type &&
+   (*info_ptr)[i].devid == devid &&
+   (*info_ptr)[i].num_stripes == num_stripes) {
+   p = (*info_ptr) + i;
+   break;
+   }
+
+   if (!p) {
+   int tmp = sizeof(struct btrfs_chunk) *
+   (*info_count + 1);
+   struct chunk_info *res = realloc(*info_ptr, tmp);
+
+   if (!res) {
+   free(*info_ptr);
+   error("not enough memory");
+   return -ENOMEM;
+   }
+
+   *info_ptr = res;
+   p = res + *info_count;
+   (*info_count)++;
+
+   p->devid = devid;
+   p->type = type;
+   p->size = 0;
+   p->num_stripes = num_stripes;
+   }
+
+   p->size += size;
+
+   }
+
+   return 0;
+
+}
+
+static int load_chunk_info(int fd, struct chunk_info **info_ptr,
+   int *info_count)
+{
+
+   char buf[4096];
+   struct btrfs_ioctl_chunk_info *bici =
+   (struct btrfs_ioctl_chunk_info *)buf;
+   int cont;
+
+   bici->buf_size = sizeof(buf);
+   bici->offset = (u64)0;
+
+   do {
+   int i;
+   struct btrfs_chunk_info *ci;
+   int ret;
+
+   cont = false;
+   ret = ioctl(fd, BTRFS_IOC_GET_CHUNK_INFO, bici);
+   if (ret < 0) {
+   int e = errno;
+
+   if (e == ENOTTY)
+   return legacy_load_chunk_info(fd, info_ptr,
+   info_count);
+   else if (e == EAGAIN)
+   cont = true;
+   else
+   return -e;
+   }
+
+   ci = btrfs_first_chunk_info(bici);
+   for (i = 0 ; i < bici->items_count ; i++) {
+   add_info_to_list(info_ptr, info_count, ci);
+   ci = btrfs_next_chunk_info(ci);
+ 

[RFC] btrfs-progs: use the new ioctl BTRFS_IOC_GET_CHUNK_INFO

2017-09-25 Thread Goffredo Baroncelli

Hi all,

this patches set uses the new ioctl BTRFS_IOC_GET_CHUNK_INFO to get
information about the chunk (see my other emails). 

The first patch change the function get_partition_size() to avoid
ioctl which would require root privileges (BLKGETSIZE64).
Instead it obtain the information via the sysfs filesystem.

The second patch use the BTRFS_IOC_GET_CHUNK_INFO instead of
TREE_SEARCH_IOCTL. The old code is still in place as fallback
when the kernel doesn't have the new ioctl.

These patches allow to use "btrfs fi usage" without root privileges.

Comments are welcome
BR
G.Baroncelli

--
gpg @keyserver.linux.it: Goffredo Baroncelli 
Key fingerprint BBF5 1610 0B64 DAC6 5F7D  17B2 0EDA 9B37 8B82 E0B5
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] get_partition_size() doens't require root privileges

2017-09-25 Thread Goffredo Baroncelli
From: Goffredo Baroncelli 

Allow the get_partition_size() to be called without
root privileges.

Signed-off-by: Goffredo baroncelli 
---
 utils.c | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/utils.c b/utils.c
index 7a2710fe..ee3ed544 100644
--- a/utils.c
+++ b/utils.c
@@ -2080,18 +2080,29 @@ u64 disk_size(const char *path)
 
 u64 get_partition_size(const char *dev)
 {
-   u64 result;
-   int fd = open(dev, O_RDONLY);
+   struct stat statbuf;
+   int r;
+   int fd;
+   char buf[100];
 
-   if (fd < 0)
-   return 0;
-   if (ioctl(fd, BLKGETSIZE64, ) < 0) {
-   close(fd);
+   r = stat(dev, );
+   if (r != 0)
return 0;
-   }
+
+   snprintf(buf, sizeof(buf),
+   "/sys/dev/block/%d:%d/size", major(statbuf.st_rdev),
+minor(statbuf.st_rdev));
+
+   fd = open(buf, O_RDONLY);
+   BUG_ON(fd < 0);
+
+   r = read(fd, buf, sizeof(buf)-1);
close(fd);
 
-   return result;
+   BUG_ON(r < 0);
+   buf[r] = 0;
+
+   return atoll(buf) * 512;
 }
 
 /*
-- 
2.14.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Btrfs Issues

2017-09-25 Thread Ruoxin Jiang
Hi Satoru,

I'm so sorry for the late reply! I rewrote the test programs to invoke
syscalls in the conventional way and added a few comments. Please find
attached the revised report.

As to your questions on "mmap", you are totally right that the first
mmap in the original 1/min.cpp is irrelevant. The original program was
automatically generated from a fuzzer I used and I hope the revised
ones to be much more understandable.

Thanks,
Amy

On Sun, Sep 17, 2017 at 11:10 PM, Satoru Takeuchi
 wrote:
> At Wed, 13 Sep 2017 11:53:35 -0400,
> Ruoxin Jiang wrote:
>>
>> [1  ]
>> Hello,
>>
>> We are researchers from Columbia University, New York. As part of our
>> current research we have found some semantic discrepancies between
>> btrfs and other popular filesystems.
>>
>> We have attached two cases. The first one involves an invalid O_DIRECT
>> write() that fails back to buffered write instead of failing with
>> error EINVAL. In directory 2, we discovered that btrfs calculates
>> write_bytes in __btrfs_buffered_write differently from that in
>> generic_perform_writes in fs/mmap.c. This can cause inconsistent
>> behavior between btrfs and other filesystems when program invokes the
>> same writev/write() syscall.
>>
>> In each directory, you will find a Readme.md describing the issue and
>> pointing to the code that may cause the problem. For your convenience,
>> we also included test programs (min.cpp) and instructions in Readme to
>> help reproduce the issues.
>>
>> We would appreciate very much if you could confirm the two cases at
>> your conveniences.
>
> I took a look at your test programs, btrfs_issues/{1,2}/min.cpp. It looks
> very hard to read since you call syscalls in odd ways and all flags are
> hardcoded as literal hexadecimal numbers. Could rewrite these program
> to improve readability?
>
> In addition, I have two questions about btrfs_issues/1/min.cpp.
>
> 1. Why you set 'filename' as the 1st argument of mmap()?
> 2. What's the purpose of mmap() call? I guess mmap() is not related to issue 
> 1.
>
> Thanks,
> Satoru
>
>>
>> Thanks,
>> Amy
>> [2 btrfs_issues.tar.gz ]
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Ruoxin(Amy) Jiang
Columbia University, M.S. Computer Science
347-277-5455


btrfs_issues_revised.tar.gz
Description: GNU Zip compressed data


Re: [PATCH][v3] Btrfs: add a extent ref verify tool

2017-09-25 Thread David Sterba
On Fri, Sep 01, 2017 at 03:09:30AM -0400, jo...@toxicpanda.com wrote:
> From: Josef Bacik 
> 
> We were having corruption issues that were tied back to problems with the 
> extent
> tree.  In order to track them down I built this tool to try and find the
> culprit, which was pretty successful.  If you compile with this tool on it 
> will
> live verify every ref update that the fs makes and make sure it is consistent
> and valid.  I've run this through with xfstests and haven't gotten any false
> positives.  Thanks,
> 
> Signed-off-by: Josef Bacik 
> ---
> v2->v3:
> - fix use after free in an error case

Can you please split the patch into more parts? This is just too big.

* the parameter changes, one per patch
* add ref-veriy.*
* new mount option and enabling the ref-verify

Apart from the specific comments written inline, here's list of thing
that I saw repeatd in several places:

printk instead of the btrfs_* error helpers - the bare printk will not
tell youw which filesystem is affected so it's not helpful when there
are several btrfs filesytems active

please don't split long strings

please don't use %Lu or %Ld format string, %llu

GFP_NOFS -- it's used on the open_ctree path so GFP_KERNEL is the right
and safe flag

misc small coding style issues


I'm half way through reviewing it from the functional side, so far it
looks good.

>  fs/btrfs/Kconfig   |   10 +
>  fs/btrfs/Makefile  |1 +
>  fs/btrfs/ctree.c   |2 +-
>  fs/btrfs/ctree.h   |   14 +-
>  fs/btrfs/disk-io.c |   15 +
>  fs/btrfs/extent-tree.c |   44 ++-
>  fs/btrfs/file.c|   10 +-
>  fs/btrfs/inode.c   |9 +-
>  fs/btrfs/ioctl.c   |2 +-
>  fs/btrfs/ref-verify.c  | 1019 
> 
>  fs/btrfs/ref-verify.h  |   51 +++
>  fs/btrfs/relocation.c  |   14 +-
>  fs/btrfs/super.c   |   17 +
>  fs/btrfs/tree-log.c|2 +-
>  14 files changed, 1178 insertions(+), 32 deletions(-)
>  create mode 100644 fs/btrfs/ref-verify.c
>  create mode 100644 fs/btrfs/ref-verify.h
> 
> diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
> index 80e9c18..77d7f74 100644
> --- a/fs/btrfs/Kconfig
> +++ b/fs/btrfs/Kconfig
> @@ -89,3 +89,13 @@ config BTRFS_ASSERT
> any of the assertions trip.  This is meant for btrfs developers only.
>  
> If unsure, say N.
> +
> +config BTRFS_FS_REF_VERIFY
> + bool "Btrfs with the ref verify tool compiled in"
> + depends on BTRFS_FS

must be N by default

> + help
> +   Enable run-time extent reference verification instrumentation.  This
> +   is meant to be used by btrfs developers for tracking down extent
> +   reference problems or verifying they didn't break something.
> +
> +   If unsure, say N.
> diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
> index 128ce17..3172751 100644
> --- a/fs/btrfs/Makefile
> +++ b/fs/btrfs/Makefile
> @@ -13,6 +13,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o 
> root-tree.o dir-item.o \
>  
>  btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
>  btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
> +btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
>  
>  btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
>   tests/extent-buffer-tests.o tests/btrfs-tests.o \
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index 6d49db7..a4812ce 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -192,7 +192,7 @@ struct extent_buffer *btrfs_lock_root_node(struct 
> btrfs_root *root)
>   * tree until you end up with a lock on the root.  A locked buffer
>   * is returned, with a reference held.
>   */
> -static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root 
> *root)
> +struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
>  {
>   struct extent_buffer *eb;
>  
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index d49b045..4fa3ddd 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1098,6 +1098,12 @@ struct btrfs_fs_info {
>   u32 nodesize;
>   u32 sectorsize;
>   u32 stripesize;
> +
> +#ifdef CONFIG_BTRFS_FS_REF_VERIFY
> + spinlock_t ref_verify_lock;
> + struct rb_root block_tree;
> + bool ref_verify_enabled;

the on/off bit can be added to the fs_info::flags instead

> +#endif
>  };
>  
>  static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
> @@ -1336,6 +1342,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct 
> btrfs_fs_info *info)
>  #define BTRFS_MOUNT_FRAGMENT_METADATA(1 << 25)
>  #define BTRFS_MOUNT_FREE_SPACE_TREE  (1 << 26)
>  #define BTRFS_MOUNT_NOLOGREPLAY  (1 << 27)
> +#define BTRFS_MOUNT_REF_VERIFY   (1 << 28)
>  
>  #define BTRFS_DEFAULT_COMMIT_INTERVAL(30)
>  #define BTRFS_DEFAULT_MAX_INLINE (2048)
> @@ -2627,7 +2634,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle 
> *trans,
>  struct extent_buffer 

Wrong device?

2017-09-25 Thread linux-btrfs


Hi,

I have 15 x 6 TB disks in md-raid (Because btrfs's raid6 was marked as 
not-for-real-use when I first time installed this machine)


Now I have got both problem twice.

At this last time I have 233 subvolums, and millions of files (all 
together)


Then filesystem went to read only with this dmesg:

[Sat Sep 23 07:25:28 2017] [ cut here ]
[Sat Sep 23 07:25:28 2017] WARNING: CPU: 5 PID: 5431 at 
/build/linux-hwe-edge-CrMNv8/linux-hwe-edge-4.10.0/fs/btrfs/extent-tree.c:6947 
__btrfs_free_extent.isra.61+0x2cb/0xeb0 [btrfs]
[Sat Sep 23 07:25:28 2017] BTRFS: Transaction aborted (error -28)
[Sat Sep 23 07:25:28 2017] Modules linked in: 8021q garp mrp stp llc bonding 
ip6table_filter ip6_tables ipt_REJECT nf_reject_ipv4 nf_log_ipv4 nf_log_common 
xt_LOG xt_limit xt_tcpudp xt_multiport iptable_filter ip_tables x_tables 
intel_rapl sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp 
ipmi_ssif kvm_intel kvm irqbypass intel_cstate intel_rapl_perf joydev 
input_leds mei_me mei lpc_ich shpchp ioatdma ipmi_si ipmi_devintf 
ipmi_msghandler acpi_pad acpi_power_meter mac_hid ib_iser rdma_cm iw_cm ib_cm 
ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 
btrfs raid10 raid0 multipath linear raid456 async_raid6_recov async_memcpy 
async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 ses enclosure ixgbe 
ast crct10dif_pclmul i2c_algo_bit crc32_pclmul ttm drm_kms_helper 
ghash_clmulni_intel
[Sat Sep 23 07:25:28 2017]  dca syscopyarea pcbc sysfillrect aesni_intel 
aes_x86_64 hid_generic sysimgblt crypto_simd mpt3sas fb_sys_fops ptp r8169 
raid_class ahci glue_helper usbhid pps_core mxm_wmi cryptd drm hid libahci mdio 
scsi_transport_sas mii fjes wmi
[Sat Sep 23 07:25:28 2017] CPU: 5 PID: 5431 Comm: btrfs-transacti Tainted: G
W   4.10.0-26-generic #30~16.04.1-Ubuntu
[Sat Sep 23 07:25:28 2017] Hardware name: Supermicro X10DRi/X10DRI-T, BIOS 2.1 
09/13/2016
[Sat Sep 23 07:25:28 2017] Call Trace:
[Sat Sep 23 07:25:28 2017]  dump_stack+0x63/0x90
[Sat Sep 23 07:25:28 2017]  __warn+0xcb/0xf0
[Sat Sep 23 07:25:28 2017]  warn_slowpath_fmt+0x5f/0x80
[Sat Sep 23 07:25:28 2017]  __btrfs_free_extent.isra.61+0x2cb/0xeb0 [btrfs]
[Sat Sep 23 07:25:28 2017]  ? btrfs_merge_delayed_refs+0x64/0x640 [btrfs]
[Sat Sep 23 07:25:28 2017]  ? btrfs_search_slot+0x981/0x9c0 [btrfs]
[Sat Sep 23 07:25:28 2017]  __btrfs_run_delayed_refs+0xb44/0x13a0 [btrfs]
[Sat Sep 23 07:25:28 2017]  ? btree_set_page_dirty+0xe/0x10 [btrfs]
[Sat Sep 23 07:25:28 2017]  ? free_extent_buffer+0x4b/0xa0 [btrfs]
[Sat Sep 23 07:25:28 2017]  ? igrab+0x1e/0x60
[Sat Sep 23 07:25:28 2017]  btrfs_run_delayed_refs+0x7f/0x2a0 [btrfs]
[Sat Sep 23 07:25:28 2017]  btrfs_write_dirty_block_groups+0x169/0x3c0 [btrfs]
[Sat Sep 23 07:25:28 2017]  commit_cowonly_roots+0x221/0x2c0 [btrfs]
[Sat Sep 23 07:25:28 2017]  btrfs_commit_transaction+0x542/0x950 [btrfs]
[Sat Sep 23 07:25:28 2017]  transaction_kthread+0x1a6/0x1c0 [btrfs]
[Sat Sep 23 07:25:28 2017]  kthread+0x109/0x140
[Sat Sep 23 07:25:28 2017]  ? btrfs_cleanup_transaction+0x540/0x540 [btrfs]
[Sat Sep 23 07:25:28 2017]  ? kthread_create_on_node+0x60/0x60
[Sat Sep 23 07:25:28 2017]  ret_from_fork+0x2c/0x40
[Sat Sep 23 07:25:28 2017] ---[ end trace 76fd3c22e75c2b85 ]---
[Sat Sep 23 07:25:28 2017] BTRFS: error (device sdb) in 
__btrfs_free_extent:6947: errno=-28 No space left
[Sat Sep 23 07:25:28 2017] BTRFS: error (device sdb) in 
btrfs_drop_snapshot:9193: errno=-28 No space left
[Sat Sep 23 07:25:28 2017] BTRFS info (device sdb): forced readonly


After a long googling (about more complex situations) I suddenly noticed 
"device sdb" WTF???  Filesystem is mounted from /dev/md3 (sdb is part of 
that mdraid) so btrfs should not even know anything about that /dev/sdb.


So maybe the error is more simple that I thought.  It tries to allocate 
more metadata from physical drive (not from /dev/md3 as it was supposed to 
do), so that "no space left on device" could be the REAL error...


But why it is doing so?  Does it help if I add some extra virtualization 
layer, like LVM?



# grep data /etc/fstab
/dev/md3 /data btrfs   compress=zlib,space_cache,noatime 0 0

# btrfs fi df /data
Data, single: total=4.77TiB, used=4.77TiB
System, DUP: total=32.00MiB, used=592.00KiB
Metadata, DUP: total=92.00GiB, used=90.87GiB
GlobalReserve, single: total=512.00MiB, used=172.50MiB

# btrfs fi show /data
Label: none  uuid: 84a31ba6-d22d-4653-b071-5525dbdfe561
Total devices 1 FS bytes used 4.85TiB
devid2 size 70.95TiB used 4.95TiB path /dev/md3

# cat /proc/mdstat
Personalities : [raid1] [raid6] [raid5] [raid4] [linear] [multipath] [raid0] 
[raid10]
md3 : active raid6 sdl[17](S) sdb[16] sdq[15] sdp[14] sdm[11] sdc[1] sdo[13] 
sde[3] sdj[8] sdn[12] sdh[6] sdi[7] sdk[9] sdf[4] sdd[2] sdg[5]
  76185088512 blocks super 1.2 level 6, 512k chunk, algorithm 2 [15/15] 
[UUU]
  bitmap: 0/44 pages [0KB], 65536KB chunk

# btrfs --version
btrfs-progs v4.4


Btrfs progs release 4.13.1

2017-09-25 Thread David Sterba
Hi,

btrfs-progs version 4.13.1 have been released.

The ZSTD support has been added, the kernel code will land in 4.14. Now
configure will detect if libzstd is available and will use it. In the future
this will become a mandatory dependency. Please check your distro for libzstd
and ask them to add it eventually. As a fallback, it is and will be possible to
disable zstd detection (and btrfs restore will obviously lack the support).

Changes:
  * image: speed up generating the sanitized names, do not generate unprintable 
chars
  * completion: add missing commands, better mount point detection
  * restore: add zstd support; libzstd detected automatically, will be
requested by default in the future, or can be configured out
  * other:
* misc fixes found by sparse
* doc enhancements, ioctl manual page started
* updated and new tests
* build fixes

Tarballs: https://www.kernel.org/pub/linux/kernel/people/kdave/btrfs-progs/
Git: git://git.kernel.org/pub/scm/linux/kernel/git/kdave/btrfs-progs.git

Shortlog:

David Sterba (18):
  btrfs-progs: README: update links, enhance contribution section
  btrfs-progs: tests: fix run_mustfail in cli-tests/007-check-force
  btrfs-progs: build: sync Makefile to Android.mk
  btrfs-progs: convert: use correct string printing for errcode_t
  btrfs-progs: tests: remove temporary loopdev files
  btrfs-progs: tests: remove temporary images in mkfs/005 and mkfs/006
  btrfs-progs: build: whitespace adjustment of [LD] fssum
  btrfs-progs: tests: make sure _is_file_or_command does not get confused
  btrfs-progs: docs: start ioctl documentation manual page
  btrfs-progs: fix debugging macro checks
  btrfs-progs: print-tree: use proper helper for reading offset
  btrfs-progs: free-space-cache: fix endianity when reading from disk_key
  btrfs-progs: updated and add missing function attributes to the definition
  btrfs-progs: build: add missing defines for the C=1 build
  btrfs-progs: build: use -std=gnu89 for sparse
  btrfs-progs: build: add more sparse warning checks
  btrfs-progs: tests: check there are no unprintable characters in 
btrfs-image -ss output
  btrfs-progs: update CHANGES for v4.13.1

Misono, Tomohiro (2):
  btrfs-progs: update btrfs-completion
  btrfs-progs: cleanup whitespaces of btrfs-completion

Naohiro Aota (2):
  btrfs-progs: build: generate all dependency files
  btrfs-progs: build: omit unnecessary -MD flag

Nicholas D Steeves (2):
  btrfs-progs: tests: Add required IETF Trust copyright to SHA 
implementation
  btrfs-progs: tests: Remove misleading BCP 78 boilerplate from SHA 
implementation

Nick Terrell (2):
  btrfs-progs: Add zstd support
  btrfs-progs: tests: add testing image for zstd for btrfs-restore

Piotr Pawlow (6):
  btrfs-progs: image: fix non-printable characters in generated file names
  btrfs-progs: image: move core find_collision code to a separate function
  btrfs-progs: image: add reverse CRC32C table
  btrfs-progs: image: add a function to calculate CRC32C collisions
  btrfs-progs: image: add a function to check if generated filename suffix 
is valid
  btrfs-progs: image: use CRC32C reversing instead of brute force to find 
collisions

Qu Wenruo (2):
  btrfs-progs: Refactor find_next_chunk to get rid of parameter root and 
objectid
  btrfs-progs: Fix one-byte overlap bug in free_block_group_cache

Uli Heller (1):
  btrfs-progs: Removed missing header file, fixes compilation error

yingyil (1):
  btrfs-progs: mkfs: refactor create_data_reloc_tree

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 0/3]: settable compression level for zstd

2017-09-25 Thread David Sterba
On Fri, Sep 15, 2017 at 05:34:57PM +0200, Adam Borowski wrote:
> Hi!
> Here's a patch set that allows changing the compression level for zstd,
> currently at mount time only.  I've played with it for a month, so despite
> being a quick hack, it's reasonably well tested.  Tested on 4.13 +
> btrfs-for-4.14 only, though -- I've booted 11th-day-of-merge-window only an
> hour ago on one machine, no explosions yet.
> 
> As a quick hack, it doesn't conserve memory as it should: all workspace
> allocations assume level 15 and waste space otherwise.
> 
> Because of an (easily changeable) quirk of compression level encoding, the
> max is set at 15, but I guess higher levels are pointless for 128KB blocks. 
> Nick and co can tell us more -- for me zstd is mostly a black box so it's
> you who knows anything about tuning it.
> 
> There are three patches:
> * [David Sterba] btrfs: allow to set compression level for zlib
>   Unmodified version of the patch from Jul 24, I'm re-sending it for
>   convenience.
> * btrfs: allow setting zlib compression level via :9
>   Some bikeshedding: it looks like Chris Mason also favours zlib:9 over
>   zlib9 as the former is more readable.  If you disagree... well, it's up
>   to you to decide anyway.  This patch accepts both syntaxes.
> * btrfs: allow setting zstd level

I'll add the patches to for-next as it does not affect default behaviour
and the changes are contained in compression. Further updates are fine,
though I think we'll have to resolve the workspace waste before the
patches can be merged to master.

I tend agree to add the separator and slightly prefer ':' over '-'. In
order to utilize the zstd at higher levels, we can enhance the
compressed chunk size and then we could use another ':' to separate that
from the rest.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/4] btrfs: Add sanity check for EXTENT_DATA when reading out leaf

2017-09-25 Thread David Sterba
On Wed, Aug 23, 2017 at 04:57:58PM +0900, Qu Wenruo wrote:
> Add extra checker for item with EXTENT_DATA type.
> This checks the following thing:
> 0) Key offset
>All key offset must be aligned to sectorsize.
>Inline extent must have 0 for key offset.
> 
> 1) Item size
>Plain text inline file extent size must match item size.

'plain text' seems to be a bit misleading, I don't think we've ever
referred to uncompressed extent as such, although it makes some sense. I
think 'uncompressed' would work too.

>(compressed inline file extent has no info about its on-disk size)
>Regular/preallocated file extent size must be a fixed value.
> 
> 2) Every member of regular file extent item
>Including alignment for bytenr and offset, possible value for
>compression/encryption/type.
> 
> 3) Type/compression/encode must be one of the valid values.
> 
> This should be the most comprehensive and restrict check in the context
> of btrfs_item for EXTENT_DATA.
> 
> Signed-off-by: Qu Wenruo 
> ---
>  fs/btrfs/disk-io.c  | 108 
> 
>  include/uapi/linux/btrfs_tree.h |   1 +
>  2 files changed, 109 insertions(+)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index e034d08bd036..b92296c6a698 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -549,6 +549,103 @@ static int check_tree_block_fsid(struct btrfs_fs_info 
> *fs_info,
>  btrfs_header_level(eb) == 0 ? "leaf" : "node",   \
>  reason, btrfs_header_bytenr(eb), root->objectid, slot)
>  
> +static int check_extent_data_item(struct btrfs_root *root,
> +   struct extent_buffer *leaf,
> +   struct btrfs_key *key, int slot)
> +{
> + struct btrfs_file_extent_item *fi;
> + u32 sectorsize = root->fs_info->sectorsize;
> + u32 item_size = btrfs_item_size_nr(leaf, slot);
> +
> + if (!IS_ALIGNED(key->offset, sectorsize)) {
> + CORRUPT("unaligned key offset for file extent",

The CORRUPT macro does not print any details beyond what it gets from
the parameters, so here we'd like to know which extent it is and what's
the size. The sectorsize can be found elsewhere so it does not need
to be printed.

> + leaf, root, slot);
> + return -EUCLEAN;
> + }
> +
> + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
> +
> + if (btrfs_file_extent_type(leaf, fi) >= BTRFS_FILE_EXTENT_LAST_TYPE) {
> + CORRUPT("invalid file extent type", leaf, root, slot);

"invalid file extent type %d"

and actually, we could add some item-specific helpers to report the
corruption so if it's for an extent, print the generic extent
information, plus an additional information what exactly was wrong.

> + return -EUCLEAN;
> + }
> +
> + /*
> +  * Support for new compression/encrption must introduce incompat flag,
> +  * and must be caught in open_ctree().
> +  */
> + if (btrfs_file_extent_compression(leaf, fi) >= BTRFS_COMPRESS_LAST) {
> + CORRUPT("invalid file extent compression", leaf, root, slot);
> + return -EUCLEAN;
> + }
> + if (btrfs_file_extent_encryption(leaf, fi)) {
> + CORRUPT("invalid file extent encryption", leaf, root, slot);
> + return -EUCLEAN;
> + }
> + if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
> + /* Inline extent must have 0 as key offset */
> + if (key->offset) {
> + CORRUPT("inline extent has non-zero key offset",
> + leaf, root, slot);
> + return -EUCLEAN;
> + }
> +
> + /* Compressed inline extent has no on-disk size, skip it */
> + if (btrfs_file_extent_compression(leaf, fi) !=
> + BTRFS_COMPRESS_NONE)
> + return 0;
> +
> + /* Plaintext inline extent size must match item size */
> + if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
> + btrfs_file_extent_ram_bytes(leaf, fi)) {
> + CORRUPT("plaintext inline extent has invalid size",
> + leaf, root, slot);
> + return -EUCLEAN;
> + }
> + return 0;
> + }
> +
> +
> + /* regular or preallocated extent has fixed item size */
> + if (item_size != sizeof(*fi)) {
> + CORRUPT(
> + "regluar or preallocated extent data item size is invalid",
> + leaf, root, slot);
> + return -EUCLEAN;
> + }
> + if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||

Is this condition right? I think I've seen random numbers of ram_bytes
in the output of dump-tree so I wonder if this applies only to some
extents where the condition holds.

> + 

btrfs send -p

2017-09-25 Thread Christian Brauner
Hi guys,


It seems that btrfs v4.12.1 allows:

(1) btrfs send -p  

but disallows

(2) btrfs send  -p 

Code-wise it assumes that  is always found at optind == 1. I was
about to patch this but I'm not sure which way we'd like to go with this:
Actually only allow (1) and block (2) properly or allow both. In any case, this
seems to me like a pretty serious regression and we'd like to get this settled
soon. :)

Thanks!
Christian
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/3] Introduce comprehensive sanity check framework and

2017-09-25 Thread David Sterba
On Wed, Aug 23, 2017 at 04:57:55PM +0900, Qu Wenruo wrote:
> The patchset introduce a new framework to do more comprehensive (if not
> the most) sanity check when reading out a leaf.
> 
> The new sanity checker will include:
> 
> 1) Key order
>Existing code
> 
> 2) Item boundary
>Existing code with enhanced checker to ensure item pointer doesn't
>overlap with item itself.
> 
> 3) Key type based sanity checker
>Only EXTENT_DATA and EXTENT_CSUM checker is implemented yet.
>As each checker should go through review and tests, or it can easily
>make a valid btrfs failed to be mounted.
>So only two checkers are implemented as an example.
> 
>Existing checker like INODE_REF checker can be moved to this
>framework easily, and we can centralize all existing checkers, make
>the rest of codes more clean.
> 
> Performance wise, it's just iterating a leaf.
> And it will only get triggered when reading a leaf, cached leaf will
> not go through such checker.
> So it won't be a performance breaker.

The series looks great, I really like it. Some of the error messages
could be more verbose about what exactly went wrong, eg. what is the
value that's not expected. I'll reply to the patches with examples what
I'd expect.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Marat Khalili

On 25/09/17 17:33, Qu Wenruo wrote:
(Any in this case, anyone in the maillist can help review messages) 
If this is a question, I can help with assigning levels to messages. 
Although I think many levels are only required for complex daemons or 
network tools, while btrfs utils mostly perform atomic operations which 
either succeed or fail. But it's of course hard to be sure without 
seeing all actual messages, probably there's some use for 4 levels.


--

With Best Regards,
Marat Khalili
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-25 Thread David Sterba
On Mon, Sep 25, 2017 at 08:31:03PM +0800, Anand Jain wrote:
> Moves btrfs_abort_transaction() to the error goto and renames
> error_trans to error_sysfs. This is a preparatory patch to
> remove the BUG_ON() in btrfs_init_new_device().

Please keep all transaction abort exactly at the place where they
happened and do not merge them to one. This pattern should be used
everwhere and is important when debugging because we can pinpoint the
line in the code from the syslog message and do not have to guess which
way it got to the merged call.

This makes the binary large, looks like a code duplication in the code
but for a good reason.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] btrfs: undo writable when sprouting fails

2017-09-25 Thread David Sterba
On Mon, Sep 25, 2017 at 08:31:02PM +0800, Anand Jain wrote:
> Signed-off-by: Anand Jain 

This does not look like change trivial enough to just leave out the
description. Please explain in more detail.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][v2] btrfs: change how we decide to commit transactions during flushing

2017-09-25 Thread David Sterba
On Fri, Aug 25, 2017 at 11:32:21AM +0300, Nikolay Borisov wrote:
> 
> 
> On 24.08.2017 17:43, Nikolay Borisov wrote:
> > 
> > 
> > On 22.08.2017 23:00, jo...@toxicpanda.com wrote:
> >> From: Josef Bacik 
> >>
> >> Nikolay reported that generic/273 was failing currently with ENOSPC.
> >> Turns out this is because we get to the point where the outstanding
> >> reservations are greater than the pinned space on the fs.  This is a
> >> mistake, previously we used the current reservation amount in
> >> may_commit_transaction, not the entire outstanding reservation amount.
> >> Fix this to find the minimum byte size needed to make progress in
> >> flushing, and pass that into may_commit_transaction.  From there we can
> >> make a smarter decision on whether to commit the transaction or not.
> >> This fixes the failure in generic/273.
> >>
> >> Reported-by: Nikolay Borisov 
> >> Signed-off-by: Josef Bacik 
> > 
> > Reviewed-and-tested-by: Nikolay Borisov 
> 
> 
> For this commit we might also add:
> 
> Fixes: 957780eb2788 ("Btrfs: introduce ticketed enospc infrastructure")
> Cc: sta...@vger.kernel.org # 4.8

JFI, tags added plus some additional explanation from N.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 22:19, Hugo Mills wrote:

On Mon, Sep 25, 2017 at 04:04:03PM +0800, Qu Wenruo wrote:



On 2017年09月25日 15:52, Hugo Mills wrote:

On Mon, Sep 25, 2017 at 03:46:15PM +0800, Qu Wenruo wrote:



On 2017年09月25日 15:42, Marat Khalili wrote:

On 25/09/17 10:30, Nikolay Borisov wrote:

On 19.09.2017 10:41, Misono, Tomohiro wrote:

"btrfs subvolume create/delete" outputs the message of "Create/Delete
subvolume ..." even when an operation fails.
Since it is confusing, let's outputs the message only when an
operation succeeds.

Please change the verb to past tense, more strongly signaling success -
i.e. "Created subvolume"

What about recalling some UNIX standards and returning to NOT
outputting any message when operation succeeds? My scripts are
full of grep -v calls after each btrfs command, and this sucks
(and I don't think I'm alone in this situation).


Isn't the correct way to catch the return value instead of grepping
the output?


It is, but if, for example, you're using the command in a cron
script which is expected to work, you don't want it producing output
because then you get a mail every time the script runs. So you have to
grep -v on the "success" output to make the successful script silent.


What about redirecting stdout to /dev/null and redirecting stderr to
mail if return value is not 0?
As for expected-to-work case, the stdout doesn't has much meaning
and return value should be good enough to judge the result.




If it's some command not returning value properly, would you please
report it as a bug so we can fix it.


It's not the return value that's problematic (although those used
to be a real mess). It's the fact that a successful run of the command
produces noise on stdout, which most commands don't.


Yes, a lot of tried-and-true tools don't output anything for
successful run, but also a lot of other tools do output something by
default, especially for complex tools like LVM.


btrfs sub create and btrfs sub delete, though, aren't complex.
They're about as complex as mkdir and rmdir, from a user point of
view. What's more, and like mkdir/rmdir, the effects of those commands
show up in the filesystem at the path given, so manual verification
could be as simple as "ls -d !$" or "ls !$/..". It's really, really
not necessary to have this command unconditionally print "yes, I
created a directory for you" to stdout.

Yes, there's ways to deal with it in shell scripts, but wouldn't
life be so much better if you didn't have to? Like you don't have to
filter out success reports from mkdir.


Maybe we can introduce a global --quite option to silent some output.


Or drop the spurious output unless it's asked for with --verbose.
Because then it makes it so much easier to compose tools together into
bigger and more complex things, which is, after all, one of the
fundamental things that UNIX does right.

I think we need at least 4 levels of message: debug, info, warn, error.
And only show debug and higher for --verbose.
Default to show info, and warn/error always get show (stderr).
(And during the new framework, we can add support for translation)

And we also need to reassign all existing message to new message level.

It may be time consuming but we just need to do it since there is real 
user demand here.

(Any in this case, anyone in the maillist can help review messages)

Thanks,
Qu



Hugo.


Thanks,
Qu


Hugo.

Thanks,
Qu


If you change the message a lot of scripts will have to be
changed, at least make it worth it.

  --

With Best Regards,
Marat Khaliili






--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Hugo Mills
On Mon, Sep 25, 2017 at 04:04:03PM +0800, Qu Wenruo wrote:
> 
> 
> On 2017年09月25日 15:52, Hugo Mills wrote:
> >On Mon, Sep 25, 2017 at 03:46:15PM +0800, Qu Wenruo wrote:
> >>
> >>
> >>On 2017年09月25日 15:42, Marat Khalili wrote:
> >>>On 25/09/17 10:30, Nikolay Borisov wrote:
> On 19.09.2017 10:41, Misono, Tomohiro wrote:
> >"btrfs subvolume create/delete" outputs the message of "Create/Delete
> >subvolume ..." even when an operation fails.
> >Since it is confusing, let's outputs the message only when an
> >operation succeeds.
> Please change the verb to past tense, more strongly signaling success -
> i.e. "Created subvolume"
> >>>What about recalling some UNIX standards and returning to NOT
> >>>outputting any message when operation succeeds? My scripts are
> >>>full of grep -v calls after each btrfs command, and this sucks
> >>>(and I don't think I'm alone in this situation).
> >>
> >>Isn't the correct way to catch the return value instead of grepping
> >>the output?
> >
> >It is, but if, for example, you're using the command in a cron
> >script which is expected to work, you don't want it producing output
> >because then you get a mail every time the script runs. So you have to
> >grep -v on the "success" output to make the successful script silent.
> 
> What about redirecting stdout to /dev/null and redirecting stderr to
> mail if return value is not 0?
> As for expected-to-work case, the stdout doesn't has much meaning
> and return value should be good enough to judge the result.
> 
> >
> >>If it's some command not returning value properly, would you please
> >>report it as a bug so we can fix it.
> >
> >It's not the return value that's problematic (although those used
> >to be a real mess). It's the fact that a successful run of the command
> >produces noise on stdout, which most commands don't.
> 
> Yes, a lot of tried-and-true tools don't output anything for
> successful run, but also a lot of other tools do output something by
> default, especially for complex tools like LVM.

   btrfs sub create and btrfs sub delete, though, aren't complex.
They're about as complex as mkdir and rmdir, from a user point of
view. What's more, and like mkdir/rmdir, the effects of those commands
show up in the filesystem at the path given, so manual verification
could be as simple as "ls -d !$" or "ls !$/..". It's really, really
not necessary to have this command unconditionally print "yes, I
created a directory for you" to stdout.

   Yes, there's ways to deal with it in shell scripts, but wouldn't
life be so much better if you didn't have to? Like you don't have to
filter out success reports from mkdir.

> Maybe we can introduce a global --quite option to silent some output.

   Or drop the spurious output unless it's asked for with --verbose.
Because then it makes it so much easier to compose tools together into
bigger and more complex things, which is, after all, one of the
fundamental things that UNIX does right.

   Hugo.

> Thanks,
> Qu
> >
> >Hugo.
> >>Thanks,
> >>Qu
> >>
> >>>If you change the message a lot of scripts will have to be
> >>>changed, at least make it worth it.
> >>>
> >>>  --
> >>>
> >>>With Best Regards,
> >>>Marat Khaliili
> >>>
> >

-- 
Hugo Mills | Great films about cricket: The Fantastic Four
hugo@... carfax.org.uk |
http://carfax.org.uk/  |
PGP: E2AB1DE4  |


signature.asc
Description: Digital signature


Re: [PATCH] btrfs: Remove memory barrier from block_group_cache_done

2017-09-25 Thread David Sterba
On Wed, Aug 30, 2017 at 07:35:42PM +0300, Nikolay Borisov wrote:
> This memory barrier was introduced in 817d52f8dba2 ("Btrfs: async block group
> caching"), but even at that time it's usage was broken since it didn't pair
> with anything. There was one situation where the cached member was set to
> BTRFS_CACHE_FINISHED in a spinlock region which *might* have acted as a 
> pairing
> barrier. Since there is no clear semantics how it's supposed to work better
> to just remove it.
> 
> Signed-off-by: Nikolay Borisov 

As discussed offline, this needs a better description that removing the
barrier will not break it.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][v2] Btrfs: rework outstanding_extents

2017-09-25 Thread David Sterba
On Mon, Aug 21, 2017 at 10:11:20AM -0400, Josef Bacik wrote:
> Ignore this, I git format-patch'ed the wrong patch.

So if you're going to send another version, please split the tracepoints
to another patch and update the format strings to the preferred format

https://btrfs.wiki.kernel.org/index.php/Development_notes#Tracepoints
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: rework outstanding_extents

2017-09-25 Thread David Sterba
On Thu, Aug 24, 2017 at 05:31:15PM +0800, Lu Fengqi wrote:
> On Thu, Aug 17, 2017 at 01:01:29PM -0400, jo...@toxicpanda.com wrote:
> >From: Josef Bacik 
> >
> >Right now we do a lot of weird hoops around outstanding_extents in order
> >to keep the extent count consistent.  This is because we logically
> >transfer the outstanding_extent count from the initial reservation
> >through the set_delalloc_bits.  This makes it pretty difficult to get a
> >handle on how and when we need to mess with outstanding_extents.
> >
> >Fix this by revamping the rules of how we deal with outstanding_extents.
> >Now instead everybody that is holding on to a delalloc extent is
> >required to increase the outstanding extents count for itself.  This
> >means we'll have something like this
> >
> >btrfs_dealloc_reserve_metadata   - outstanding_extents = 1
> > btrfs_set_delalloc  - outstanding_extents = 2
> >btrfs_release_delalloc_extents   - outstanding_extents = 1
> >
> >for an initial file write.  Now take the append write where we extend an
> >existing delalloc range but still under the maximum extent size
> >
> >btrfs_delalloc_reserve_metadata - outstanding_extents = 2
> >  btrfs_set_delalloc
> >btrfs_set_bit_hook   - outstanding_extents = 3
> >btrfs_merge_bit_hook - outstanding_extents = 2
> >btrfs_release_delalloc_extents   - outstanding_extnets = 1
> >
> >In order to make the ordered extent transition we of course must now
> >make ordered extents carry their own outstanding_extent reservation, so
> >for cow_file_range we end up with
> >
> >btrfs_add_ordered_extent - outstanding_extents = 2
> >clear_extent_bit - outstanding_extents = 1
> >btrfs_remove_ordered_extent  - outstanding_extents = 0
> >
> >This makes all manipulations of outstanding_extents much more explicit.
> >Every successful call to btrfs_reserve_delalloc_metadata _must_ now be
> >combined with btrfs_release_delalloc_extents, even in the error case, as
> >that is the only function that actually modifies the
> >outstanding_extents counter.
> 
> s/btrfs_reserve_delalloc_metadata/btrfs_delalloc_reserve_metadata
> s/btrfs_release_delalloc_extents/btrfs_delalloc_release_extents
> 
> Acked-by: Lu Fengqi 

BTW,

Documentation/process/5.Posting.rst

216  - Acked-by: indicates an agreement by another developer (often a
217maintainer of the relevant code) that the patch is appropriate for
218inclusion into the kernel.

and is commonly used by maintainers when someone else modifies the
respective codebase by tree-wide, API or similar.

If you agree with the way the patch is solving a particular problem and
think it's important enough to be stated, you can write that in plain
text.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 08/10] btrfs: remove unused parameter in cow_file_range

2017-09-25 Thread David Sterba
On Mon, Sep 25, 2017 at 03:18:31PM +0300, Nikolay Borisov wrote:
> On 21.08.2017 12:43, Nikolay Borisov wrote:
> > The delalloc_end parameter was only passed to extent_clear_unlock_delalloc, 
> > but
> > since its usage was removed form that function let's also remove it from the
> > caller's parameter list.
> > 
> > Signed-off-by: Nikolay Borisov 
> 
> ping, since most of the series has been merged but this one has been
> left out I assume it could have been missed.

https://marc.info/?l=linux-btrfs=150481210920815

Dependent change, so not merged.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 08/10] btrfs: remove unused parameter in cow_file_range

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 20:18, Nikolay Borisov wrote:



On 21.08.2017 12:43, Nikolay Borisov wrote:

The delalloc_end parameter was only passed to extent_clear_unlock_delalloc, but
since its usage was removed form that function let's also remove it from the
caller's parameter list.

Signed-off-by: Nikolay Borisov 


ping, since most of the series has been merged but this one has been
left out I assume it could have been missed.


Commit 897a41b1167955bd543bb252fd3f06f5844f2177
btrfs: extend btrfs_set_extent_delalloc and its friends to support 
in-band dedupe and subpage size patchset


That line is added to support subpage sized sectorsize.
(BTW, inband dedupe is not really using it IIRC)

It's my fault since at that time I believed subpage size patchset would 
be merged before inband dedupe, but the truth is neither is going to be 
merged soon.


I think it's better to revert that commit until we really have a 
schedule for sub page size patchset.
(BTW, according to the update internal of patchset "Allow I/O on blocks 
whose size is less than page size", it may be updated in recent month if 
it gets updated)


Thanks,
Qu



---
  fs/btrfs/inode.c | 20 +---
  1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 98b56aca0a6f..156be9face5b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -105,9 +105,9 @@ static int btrfs_truncate(struct inode *inode);
  static int btrfs_finish_ordered_io(struct btrfs_ordered_extent 
*ordered_extent);
  static noinline int cow_file_range(struct inode *inode,
   struct page *locked_page,
-  u64 start, u64 end, u64 delalloc_end,
-  int *page_started, unsigned long *nr_written,
-  int unlock, struct btrfs_dedupe_hash *hash);
+  u64 start, u64 end, int *page_started,
+  unsigned long *nr_written, int unlock,
+  struct btrfs_dedupe_hash *hash);
  static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 
len,
   u64 orig_start, u64 block_start,
   u64 block_len, u64 orig_block_len,
@@ -739,8 +739,6 @@ static noinline void submit_compressed_extents(struct inode 
*inode,
 async_extent->start,
 async_extent->start +
 async_extent->ram_size - 1,
-async_extent->start +
-async_extent->ram_size - 1,
 _started, _written, 0,
 NULL);
  
@@ -930,9 +928,9 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,

   */
  static noinline int cow_file_range(struct inode *inode,
   struct page *locked_page,
-  u64 start, u64 end, u64 delalloc_end,
-  int *page_started, unsigned long *nr_written,
-  int unlock, struct btrfs_dedupe_hash *hash)
+  u64 start, u64 end, int *page_started,
+  unsigned long *nr_written, int unlock,
+  struct btrfs_dedupe_hash *hash)
  {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1431,7 +1429,7 @@ static noinline int run_delalloc_nocow(struct inode 
*inode,
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page,
 cow_start, found_key.offset - 1,
-end, page_started, nr_written, 1,
+page_started, nr_written, 1,
 NULL);
if (ret) {
if (!nolock && nocow)
@@ -1517,7 +1515,7 @@ static noinline int run_delalloc_nocow(struct inode 
*inode,
}
  
  	if (cow_start != (u64)-1) {

-   ret = cow_file_range(inode, locked_page, cow_start, end, end,
+   ret = cow_file_range(inode, locked_page, cow_start, end,
 page_started, nr_written, 1, NULL);
if (ret)
goto error;
@@ -1574,7 +1572,7 @@ static int run_delalloc_range(void *private_data, struct 
page *locked_page,
ret = run_delalloc_nocow(inode, locked_page, start, end,
 page_started, 0, nr_written);
} else if (!inode_need_compress(inode, start, end)) {
-   ret = 

Re: [PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 20:31, Anand Jain wrote:

Moves btrfs_abort_transaction() to the error goto and renames
error_trans to error_sysfs. This is a preparatory patch to
remove the BUG_ON() in btrfs_init_new_device().

Signed-off-by: Anand Jain 
---
  fs/btrfs/volumes.c | 23 +--
  1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9d64700cc9b6..5c34eea9d12d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2443,26 +2443,20 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
mutex_lock(_info->chunk_mutex);
ret = init_first_rw_device(trans, fs_info);
mutex_unlock(_info->chunk_mutex);
-   if (ret) {
-   btrfs_abort_transaction(trans, ret);
-   goto error_trans;
-   }
+   if (ret)
+   goto error_sysfs;
}
  
  	ret = btrfs_add_device(trans, fs_info, device);

-   if (ret) {
-   btrfs_abort_transaction(trans, ret);
-   goto error_trans;
-   }
+   if (ret)
+   goto error_sysfs;
  
  	if (seeding_dev) {

char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
  
  		ret = btrfs_finish_sprout(trans, fs_info);

-   if (ret) {
-   btrfs_abort_transaction(trans, ret);
-   goto error_trans;
-   }
+   if (ret)
+   goto error_sysfs;
  
  		/* Sprouting would change fsid of the mounted root,

 * so rename the fsid on the sysfs
@@ -2500,12 +2494,13 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
update_dev_time(device_path);
return ret;
  
-error_trans:

+error_sysfs:
+   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
+   btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);


Abort then end? Looks redundant at least.


rcu_string_free(device->name);
-   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);


Any particular reason to move this line ahead?

I didn't see it has something to do with transaction, so just curious 
about the purpose.


Thanks,
Qu

kfree(device);
  error:
blkdev_put(bdev, FMODE_EXCL);


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-25 Thread Nikolay Borisov


On 25.09.2017 16:07, Nikolay Borisov wrote:
> 
> 
> On 25.09.2017 15:31, Anand Jain wrote:
>> Moves btrfs_abort_transaction() to the error goto and renames
>> error_trans to error_sysfs. This is a preparatory patch to
>> remove the BUG_ON() in btrfs_init_new_device().
>>
>> Signed-off-by: Anand Jain 
>> ---
>>  fs/btrfs/volumes.c | 23 +--
>>  1 file changed, 9 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 9d64700cc9b6..5c34eea9d12d 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -2443,26 +2443,20 @@ int btrfs_init_new_device(struct btrfs_fs_info 
>> *fs_info, const char *device_path
>>  mutex_lock(_info->chunk_mutex);
>>  ret = init_first_rw_device(trans, fs_info);
>>  mutex_unlock(_info->chunk_mutex);
>> -if (ret) {
>> -btrfs_abort_transaction(trans, ret);
>> -goto error_trans;
>> -}
>> +if (ret)
>> +goto error_sysfs;
>>  }
>>  
>>  ret = btrfs_add_device(trans, fs_info, device);
>> -if (ret) {
>> -btrfs_abort_transaction(trans, ret);
>> -goto error_trans;
>> -}
>> +if (ret)
>> +goto error_sysfs;
>>  
>>  if (seeding_dev) {
>>  char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
>>  
>>  ret = btrfs_finish_sprout(trans, fs_info);
>> -if (ret) {
>> -btrfs_abort_transaction(trans, ret);
>> -goto error_trans;
>> -}
>> +if (ret)
>> +goto error_sysfs;
>>  
>>  /* Sprouting would change fsid of the mounted root,
>>   * so rename the fsid on the sysfs
>> @@ -2500,12 +2494,13 @@ int btrfs_init_new_device(struct btrfs_fs_info 
>> *fs_info, const char *device_path
>>  update_dev_time(device_path);
>>  return ret;
>>  
>> -error_trans:
>> +error_sysfs:
>> +btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
> 
> Any reason why you move btrfs_sysfs_rm_device_link?
> 
>>  if (seeding_dev)
>>  sb->s_flags |= MS_RDONL> +  btrfs_abort_transaction(trans, 
>> ret);
>>  btrfs_end_transaction(trans);
>>  rcu_string_free(device->name);
>> -btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
>>  kfree(device);>  error:
>>  blkdev_put(bdev, FMODE_EXCL);
> 
> Also which branch is your code based on since in Linus' master the error
> handling (before your patch) looks a bit different. E.g. the MS_RDONL
> stuff look wrong.
> 
> error_trans:
> 
> btrfs_end_transaction(trans);
> 
> rcu_string_free(device->name);
> 
> btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
> 
> kfree(device);
> 
> error:
> 
> blkdev_put(bdev, FMODE_EXCL);
> 
> if (seeding_dev) {
> 
> mutex_unlock(_mutex);
> 
> up_write(>s_umount);
> 
> }
> 
> return ret;

Ok, turned out I hadn't received 1/3 so this answers this question. The
sysfs rm stands though


> 
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-25 Thread Nikolay Borisov


On 25.09.2017 15:31, Anand Jain wrote:
> Moves btrfs_abort_transaction() to the error goto and renames
> error_trans to error_sysfs. This is a preparatory patch to
> remove the BUG_ON() in btrfs_init_new_device().
> 
> Signed-off-by: Anand Jain 
> ---
>  fs/btrfs/volumes.c | 23 +--
>  1 file changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 9d64700cc9b6..5c34eea9d12d 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -2443,26 +2443,20 @@ int btrfs_init_new_device(struct btrfs_fs_info 
> *fs_info, const char *device_path
>   mutex_lock(_info->chunk_mutex);
>   ret = init_first_rw_device(trans, fs_info);
>   mutex_unlock(_info->chunk_mutex);
> - if (ret) {
> - btrfs_abort_transaction(trans, ret);
> - goto error_trans;
> - }
> + if (ret)
> + goto error_sysfs;
>   }
>  
>   ret = btrfs_add_device(trans, fs_info, device);
> - if (ret) {
> - btrfs_abort_transaction(trans, ret);
> - goto error_trans;
> - }
> + if (ret)
> + goto error_sysfs;
>  
>   if (seeding_dev) {
>   char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
>  
>   ret = btrfs_finish_sprout(trans, fs_info);
> - if (ret) {
> - btrfs_abort_transaction(trans, ret);
> - goto error_trans;
> - }
> + if (ret)
> + goto error_sysfs;
>  
>   /* Sprouting would change fsid of the mounted root,
>* so rename the fsid on the sysfs
> @@ -2500,12 +2494,13 @@ int btrfs_init_new_device(struct btrfs_fs_info 
> *fs_info, const char *device_path
>   update_dev_time(device_path);
>   return ret;
>  
> -error_trans:
> +error_sysfs:
> + btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);

Any reason why you move btrfs_sysfs_rm_device_link?

>   if (seeding_dev)
>   sb->s_flags |= MS_RDONL> +  btrfs_abort_transaction(trans, 
> ret);
>   btrfs_end_transaction(trans);
>   rcu_string_free(device->name);
> - btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
>   kfree(device);>  error:
>   blkdev_put(bdev, FMODE_EXCL);

Also which branch is your code based on since in Linus' master the error
handling (before your patch) looks a bit different. E.g. the MS_RDONL
stuff look wrong.

error_trans:

btrfs_end_transaction(trans);

rcu_string_free(device->name);

btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);

kfree(device);

error:

blkdev_put(bdev, FMODE_EXCL);

if (seeding_dev) {

mutex_unlock(_mutex);

up_write(>s_umount);

}

return ret;

> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] btrfs: undo writable when sprouting fails

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 20:31, Anand Jain wrote:

Signed-off-by: Anand Jain 


The error_trans tag needs to revert the RDONLY flag without doubt.


But just a small question.

Just lines before error_trans tag, we're committing transaction and then 
relocating system chunks.

In which we don't use goto error branch but returning directly.

Don't we need to to revert the RDONLY flag in that error handler?

Thanks,
Qu


---
  fs/btrfs/volumes.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..9d64700cc9b6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2501,6 +2501,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
return ret;
  
  error_trans:

+   if (seeding_dev)
+   sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Wiki updates

2017-09-25 Thread David Sterba
Hi,

I've updated the Status page and synced the manual pages from
btrfs-progs git.

The status of various features is split to one more column, Performance.
There's a section for each 'mostly OK' status below the table and linked
from the note. This will hopefully be more understandable than just a
single color table cell.

Please note that the page is locked, so either drop me a mail or use the
talk page to suggest edits.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-25 Thread Anand Jain
Moves btrfs_abort_transaction() to the error goto and renames
error_trans to error_sysfs. This is a preparatory patch to
remove the BUG_ON() in btrfs_init_new_device().

Signed-off-by: Anand Jain 
---
 fs/btrfs/volumes.c | 23 +--
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9d64700cc9b6..5c34eea9d12d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2443,26 +2443,20 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
mutex_lock(_info->chunk_mutex);
ret = init_first_rw_device(trans, fs_info);
mutex_unlock(_info->chunk_mutex);
-   if (ret) {
-   btrfs_abort_transaction(trans, ret);
-   goto error_trans;
-   }
+   if (ret)
+   goto error_sysfs;
}
 
ret = btrfs_add_device(trans, fs_info, device);
-   if (ret) {
-   btrfs_abort_transaction(trans, ret);
-   goto error_trans;
-   }
+   if (ret)
+   goto error_sysfs;
 
if (seeding_dev) {
char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
 
ret = btrfs_finish_sprout(trans, fs_info);
-   if (ret) {
-   btrfs_abort_transaction(trans, ret);
-   goto error_trans;
-   }
+   if (ret)
+   goto error_sysfs;
 
/* Sprouting would change fsid of the mounted root,
 * so rename the fsid on the sysfs
@@ -2500,12 +2494,13 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
update_dev_time(device_path);
return ret;
 
-error_trans:
+error_sysfs:
+   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
+   btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
rcu_string_free(device->name);
-   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
kfree(device);
 error:
blkdev_put(bdev, FMODE_EXCL);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] btrfs: fix BUG_ON in btrfs_init_new_device()

2017-09-25 Thread Anand Jain
Here we will simply return the error to the caller. And handle the
fail condition by calling the abort transaction through the error
path.

Signed-off-by: Anand Jain 
---
 fs/btrfs/volumes.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5c34eea9d12d..93eaf314fe72 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2399,7 +2399,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
if (seeding_dev) {
sb->s_flags &= ~MS_RDONLY;
ret = btrfs_prepare_sprout(fs_info);
-   BUG_ON(ret); /* -ENOMEM */
+   if (ret)
+   goto error_trans;
}
 
device->fs_devices = fs_info->fs_devices;
@@ -2496,6 +2497,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
 
 error_sysfs:
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+error_trans:
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
btrfs_abort_transaction(trans, ret);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] fix bug in btrfs_init_new_device()

2017-09-25 Thread Anand Jain
1/3 fixes a bug which failed to reset writable when sprouting failed
2/3 is cleanup and preparatory to fix 3/3
3/3 fixes BUG_ON in btrfs_init_new_device()

Anand Jain (3):
  btrfs: undo writable when sprouting fails
  btrfs: cleanup btrfs_init_new_device()
  btrfs: fix BUG_ON in btrfs_init_new_device()

 fs/btrfs/volumes.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] btrfs: undo writable when sprouting fails

2017-09-25 Thread Anand Jain
Signed-off-by: Anand Jain 
---
 fs/btrfs/volumes.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..9d64700cc9b6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2501,6 +2501,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
return ret;
 
 error_trans:
+   if (seeding_dev)
+   sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 08/10] btrfs: remove unused parameter in cow_file_range

2017-09-25 Thread Nikolay Borisov


On 21.08.2017 12:43, Nikolay Borisov wrote:
> The delalloc_end parameter was only passed to extent_clear_unlock_delalloc, 
> but
> since its usage was removed form that function let's also remove it from the
> caller's parameter list.
> 
> Signed-off-by: Nikolay Borisov 

ping, since most of the series has been merged but this one has been
left out I assume it could have been missed.

> ---
>  fs/btrfs/inode.c | 20 +---
>  1 file changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 98b56aca0a6f..156be9face5b 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -105,9 +105,9 @@ static int btrfs_truncate(struct inode *inode);
>  static int btrfs_finish_ordered_io(struct btrfs_ordered_extent 
> *ordered_extent);
>  static noinline int cow_file_range(struct inode *inode,
>  struct page *locked_page,
> -u64 start, u64 end, u64 delalloc_end,
> -int *page_started, unsigned long *nr_written,
> -int unlock, struct btrfs_dedupe_hash *hash);
> +u64 start, u64 end, int *page_started,
> +unsigned long *nr_written, int unlock,
> +struct btrfs_dedupe_hash *hash);
>  static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 
> len,
>  u64 orig_start, u64 block_start,
>  u64 block_len, u64 orig_block_len,
> @@ -739,8 +739,6 @@ static noinline void submit_compressed_extents(struct 
> inode *inode,
>async_extent->start,
>async_extent->start +
>async_extent->ram_size - 1,
> -  async_extent->start +
> -  async_extent->ram_size - 1,
>_started, _written, 0,
>NULL);
>  
> @@ -930,9 +928,9 @@ static u64 get_extent_allocation_hint(struct inode 
> *inode, u64 start,
>   */
>  static noinline int cow_file_range(struct inode *inode,
>  struct page *locked_page,
> -u64 start, u64 end, u64 delalloc_end,
> -int *page_started, unsigned long *nr_written,
> -int unlock, struct btrfs_dedupe_hash *hash)
> +u64 start, u64 end, int *page_started,
> +unsigned long *nr_written, int unlock,
> +struct btrfs_dedupe_hash *hash)
>  {
>   struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>   struct btrfs_root *root = BTRFS_I(inode)->root;
> @@ -1431,7 +1429,7 @@ static noinline int run_delalloc_nocow(struct inode 
> *inode,
>   if (cow_start != (u64)-1) {
>   ret = cow_file_range(inode, locked_page,
>cow_start, found_key.offset - 1,
> -  end, page_started, nr_written, 1,
> +  page_started, nr_written, 1,
>NULL);
>   if (ret) {
>   if (!nolock && nocow)
> @@ -1517,7 +1515,7 @@ static noinline int run_delalloc_nocow(struct inode 
> *inode,
>   }
>  
>   if (cow_start != (u64)-1) {
> - ret = cow_file_range(inode, locked_page, cow_start, end, end,
> + ret = cow_file_range(inode, locked_page, cow_start, end,
>page_started, nr_written, 1, NULL);
>   if (ret)
>   goto error;
> @@ -1574,7 +1572,7 @@ static int run_delalloc_range(void *private_data, 
> struct page *locked_page,
>   ret = run_delalloc_nocow(inode, locked_page, start, end,
>page_started, 0, nr_written);
>   } else if (!inode_need_compress(inode, start, end)) {
> - ret = cow_file_range(inode, locked_page, start, end, end,
> + ret = cow_file_range(inode, locked_page, start, end,
> page_started, nr_written, 1, NULL);
>   } else {
>   set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: send, apply asynchronous page cache readahead to enhance page read

2017-09-25 Thread Nikolay Borisov


On 15.09.2017 11:47, peterh wrote:
> From: Kuanling Huang 
> 
> By analyzing the perf on btrfs send, we found it take large
> amount of cpu time on page_cache_sync_readahead. This effort
> can be reduced after switching to asynchronous one. Overall
> performance gain on HDD and SSD were 9 and 15 percent if
> simply send a large file.
> 
> Signed-off-by: Kuanling Huang 

Reviewed-by: Nikolay Borisov 

> ---
>  fs/btrfs/send.c | 21 -
>  1 file changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> index 63a6152..7a5eb66 100644
> --- a/fs/btrfs/send.c
> +++ b/fs/btrfs/send.c
> @@ -4475,16 +4475,27 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, 
> u64 offset, u32 len)
>   /* initial readahead */
>   memset(>ra, 0, sizeof(struct file_ra_state));
>   file_ra_state_init(>ra, inode->i_mapping);
> - btrfs_force_ra(inode->i_mapping, >ra, NULL, index,
> -last_index - index + 1);
>  
>   while (index <= last_index) {
>   unsigned cur_len = min_t(unsigned, len,
>PAGE_CACHE_SIZE - pg_offset);
> - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
> + page = find_lock_page(inode->i_mapping, index);
>   if (!page) {
> - ret = -ENOMEM;
> - break;
> + page_cache_sync_readahead(inode->i_mapping,
> + >ra, NULL, index,
> + last_index + 1 - index);
> +
> + page = find_or_create_page(inode->i_mapping, index, 
> GFP_KERNEL);
> + if (!page) {
> + ret = -ENOMEM;
> + break;
> + }
> + }
> +
> + if (PageReadahead(page)) {
> + page_cache_async_readahead(inode->i_mapping,
> + >ra, NULL, page, index,
> + last_index + 1 - index);
>   }
>  
>   if (!PageUptodate(page)) {
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 07/14] btrfs-progs: Doc/mkfs: Add extra condition for rootdir option

2017-09-25 Thread Austin S. Hemmelgarn

On 2017-09-22 11:07, Qu Wenruo wrote:



On 2017年09月22日 21:33, Austin S. Hemmelgarn wrote:

On 2017-09-22 08:32, Qu Wenruo wrote:

On 2017年09月22日 19:38, Austin S. Hemmelgarn wrote:

On 2017-09-22 06:39, Qu Wenruo wrote:
As I already stated in an other thread, if you want to shrink, do 
it in another command line tool.
Do one thing and do it simple. (Although Btrfs itself is already 
out of the UNIX way)
Unless I'm reading the code wrong, the shrinking isn't happening in 
a second pass, so this _is_ doing one thing, and it appears to be 
doing it as simply as possible (although arguably not correctly 
because of the 1MB reserved area being used).


If you're referring to my V1 implementation of shrink, that's doing 
*one* thing.


But the original shrinking code? Nope, or we won't have the custom 
chunk allocator at all.


What I really mean is, if one wants to shrink, at least don't couple 
the shrink code into "mkfs.btrfs".


Do shrink in its own tool/subcommand, not in a really unrelated tool.

There are two cases for shrinking a filesystem:
1. You're resizing it to move to a smaller disk (or speed up copying 
to another disk).
2. You're generating a filesystem image that needs to be as small as 
possible.


I would argue there is no meaning of creating *smallest* image. (Of 
course it exists).
There is an exact meaning given the on-disk layout.  It's an image whose 
size is equal to the sum of:

1. 1MB (for the reserved space at the beginning).
2. However many superblocks it should have given the size.
3. The total amount of file data and extended attribute data to be 
included, rounding up for block size
4. The exact amount of metadata space needed to represent the tree from 
3, also rounding up for block size.
5. The exact amount of system chunk space needed to handle 3 and 4, plus 
enough room to allocate at least one more chunk of each type (to 
ultimately allow for resizing the filesystem if desired).

6. Exactly enough reserved metadata space to resize the FS.


We could put tons of code to implement, and more (or less) test cases to 
verify it.


But the demand doesn't validate the effort.
And how much effort has been put into ripping this out completely 
together with the other fixes?  How much more would it have been to just 
move it to another option and fix the reserved area usage?


All my points are clear for this patchset:
I know I removed one function, and my reason is:
1) No or little usage
    And it's anti intuition.
So split it to a separate tool (mkimage maybe?), and fix mkfs to behave 
sensibly.  I absolutely agree on the fact that it's non-intuitive.  It 
should either be it's own option (with a dependency on -r being passed 
of course), or a separate tool if you're so worried about mkfs being too 
complex.


As to usage, given the current data, there is no proof that I'm the only 
one using it, but there is also no proof that anybody other than me is 
using it, which means that you can't reasonably base an argument on 
actual usage of this option, since you can't prove anything about usage. 
 All you know is that you have one person who uses it, and one who was 
confused by it (but appears to want to use it in a different way).  It's 
a niche use case though, and when dealing with something like this, 
there is a threshold of usage below which you won't see much in the way 
of discussion of the option on the list, since only a reasonably small 
percentage of BTRFS users are actually subscribed.



2) Dead code (not tested nor well documented)
It _IS NOT_ dead code.  It is absolutely reachable from code 
external to itself.  It's potentially unused code, but that is not the 
same thing as dead code.


That aside, I can fix up the documentation, and I've actually tested it 
reasonably thoroughly (I use it every month or so when I update stuff I 
have using seed devices, and it also gets used by my testing 
infrastructure when generating images pre-loaded with files for tests to 
save time).  I'll agree it hasn't been rigorously tested, but it does 
appear to work as (not) advertised, even when used in odd ways.



3) Possible workaround
There are three options for workarounds, and both of them are sub-par to 
this even aside from the reduced simplicity it offers to userspace:
1. Resize after mkfs.  This is impractical both because there is no 
offline resize (having to mount the FS RW prior to use as a seed device 
means that you don't have a guaranteed reproducible image, which is a 
pretty common request for container usage there days), and it will end 
up with wasted space (the smallest possible filesystem created through a 
resize is consistently larger (by more than 1MB) than what the -r option 
to mkfs generates).
2. Use a binary search to determine the smallest size to a reasonable 
margin.  This is impractical simply because it takes too long, and again 
can't reliably get the smallest possible image.
3. Attempt to compute the smallest possible image without using a binary 

Re: [PATCH v3 07/14] btrfs-progs: Doc/mkfs: Add extra condition for rootdir option

2017-09-25 Thread Austin S. Hemmelgarn

On 2017-09-24 10:08, Goffredo Baroncelli wrote:

On 09/24/2017 12:10 PM, Anand Jain wrote:




All my points are clear for this patchset:
I know I removed one function, and my reason is:
1) No or little usage
     And it's anti intuition.
2) Dead code (not tested nor well documented)
3) Possible workaround

I can add several extra reasons as I stated before, but number of reasons won't 
validate anything anyway.


  End user convenience wins over the developer's technical difficulties.


Sorry, but I have to agree with Qu; there is no a big demand (other than Austin) for this 
functionality; even you stated that "...at some point it may..."; until now the 
UI is quite unfriendly (we should use a big enough device, and then cut it by hand on the 
basis of the output of mkfs.btrfs)...
I will comment that I agree that it should not be the default.  It's not 
intuitive for most people, and as you say it's a niche feature.  Just 
removing it completely though with the above argument sounds very much 
like trying to meet quotas for coding.


I fear that this is another feature which increase the complexity of btrfs (and 
tools) with little or none usage
On average?  It only increases the complexity of mkfs once there's a fix 
for the (theoretically trivial to fix) issue with it ignoring the fact 
that the first 1MB of space is supposed to be untouched by BTRFS.


The work of Qu is a nice cleanup; I hope that this will be the direction which BTRFS will 
takes: removing of "strange/unused" features improving the reliability of the 
others.
The two are not inherently interdependent, and that argument doesn't 
exactly hold up to scrutiny considering that mkfs is already perfectly 
reliable even with this feature, and it does not compromise the 
reliability of other features (again, once you fix the usage of the 
reserved area at the beginning of the image).

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs scrub crashes OS

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 17:50, Lukas Pirl wrote:

Dear all,

I experience reproducible OS crashes when scrubbing a btrfs file system.
Apart from that, the file system mounts rw and is usable without any
problems (including modifying snapshots and all that).

When the system crashes (i.e., freezes), there are no errors printed to
the system logs or via `dmesg` (had a display connected).


Even no dmesg output using tty or netconsole?

That's strange.
Normally it should be kernel BUG_ON() to cause such problem.

And if the system is still responsible (either from TTY or ssh), is 
there anything strange like tons of IO or CPU usage?




Recovery is only possible via power-cycling the machine.

The host experienced a lot of crashes and ATA errors due to hardware
failures in the past.
To the best of my knowledge, the hardware is stable now.

`btrfs device stats` outputs zeros for all counters.

`btrfsck --readonly --mode lowmem` outputs a bunch of
   referencer count mismatch …
and
   ERROR: data extent[… …] backref lost
see https://pastebin.com/seC4fReP for the full log.


There is a known bug for lowmem mode to report such false alert.

Btrfs-progs v4.13 should have fixed it.

As long as v4.13 btrfs check reports no error, its metadata should be good.



System info:
btrfs RAID 1 (~1.5 years old), 7 SATA HDDs


Oh, RAID1, so normal "btrfs check --check-data-csum" can't really check 
all data checksum. (It will pass 2nd mirror if 1st one matches the csum)


You could try the out-of-tree offline scrub to do a full scrub of your 
fs unmounted, so it won't crash your system (if nothing wrong happened)

https://github.com/gujx2017/btrfs-progs/tree/offline_scrub


   MIXED_BACKREF, BIG_METADATA, EXTENDED_IREF, SKINNY_METADATA, NO_HOLES


Only NO_HOLES is not ordinary, but shouldn't cause a problem.


Without kernel backtrace, it's tricky to locate the problem.
So I would recommend to use netconsole (IIRC more reliable, as I use it 
on my test VM to capture the dying message) or TTY output to verify 
there is no kernel message/backtrace.


Thanks,
Qu


   no quotas in use
   see also https://pastebin.com/4me6zDsN for more details
btrfs-progs v4.12
GNU/Linux 4.12.0-0.bpo.1-amd64 #1 SMP Debian 4.12.6-1~bpo9+1 x86_64

The question, obviously, is how can I make this fs "scrubable" again?
Are the errors found by btrfsck safe to repair using btrfsck or some
other tool?

Thank you so much in advance,

Lukas
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


btrfs scrub crashes OS

2017-09-25 Thread Lukas Pirl
Dear all,

I experience reproducible OS crashes when scrubbing a btrfs file system.
Apart from that, the file system mounts rw and is usable without any
problems (including modifying snapshots and all that).

When the system crashes (i.e., freezes), there are no errors printed to
the system logs or via `dmesg` (had a display connected).

Recovery is only possible via power-cycling the machine.

The host experienced a lot of crashes and ATA errors due to hardware
failures in the past.
To the best of my knowledge, the hardware is stable now.

`btrfs device stats` outputs zeros for all counters.

`btrfsck --readonly --mode lowmem` outputs a bunch of
  referencer count mismatch …
and
  ERROR: data extent[… …] backref lost
see https://pastebin.com/seC4fReP for the full log.

System info:
btrfs RAID 1 (~1.5 years old), 7 SATA HDDs
  MIXED_BACKREF, BIG_METADATA, EXTENDED_IREF, SKINNY_METADATA, NO_HOLES
  no quotas in use
  see also https://pastebin.com/4me6zDsN for more details
btrfs-progs v4.12
GNU/Linux 4.12.0-0.bpo.1-amd64 #1 SMP Debian 4.12.6-1~bpo9+1 x86_64

The question, obviously, is how can I make this fs "scrubable" again?
Are the errors found by btrfsck safe to repair using btrfsck or some
other tool?

Thank you so much in advance,

Lukas
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: check: check invalid extent_inline_ref type in lowmem

2017-09-25 Thread Su Yue



On 09/24/2017 10:17 PM, Nikolay Borisov wrote:



On 19.09.2017 13:00, Qu Wenruo wrote:



On 2017年09月19日 17:48, Su Yue wrote:



On 09/19/2017 04:48 PM, Qu Wenruo wrote:



On 2017年09月19日 16:32, Su Yue wrote:

Lowmem check does not skip invalid type in extent_inline_ref then
calls btrfs_extent_inline_ref_size(type) which causes crash.

Example:
$ ./btrfs-corrupt-block -e -l 20971520 /tmp/data_small
corrupting extent record: key 20971520 169 0
$ btrfs check --mode=lowmem  /tmp/data_small
Checking filesystem on /tmp/data_small
UUID: ee205d69-8724-4aa2-a4f5-bc8558a62169
checking extents
ERROR: extent[20971520 16384] backref type mismatch, missing bit: 2
ERROR: extent[20971520 16384] backref generation mismatch,
wanted: 7, have: 0
ERROR: extent[20971520 16384] is referred by other roots than 3
ctree.h:1754: btrfs_extent_inline_ref_size: BUG_ON `1` triggered,
value 1
btrfs(+0x543db)[0x55fabc2ab3db]
btrfs(+0x587f7)[0x55fabc2af7f7]
btrfs(+0x5fa44)[0x55fabc2b6a44]
btrfs(cmd_check+0x194a)[0x55fabc2bd717]
btrfs(main+0x88)[0x55fabc2682e0]
/usr/lib/libc.so.6(__libc_start_main+0xea)[0x7f021c3824ca]
btrfs(_start+0x2a)[0x55fabc267e7a]
[1]    5188 abort (core dumped)  btrfs check --mode=lowmem
/tmp/data_small

Fix it by checking type before obtaining inline_ref size.

Signed-off-by: Su Yue 


Code itself looks good.
And test case please.


Thanks for review. I'll add test case in next version.


Reviewed-by: Qu Wenruo 

However, such whac-a-mole fix will finally be a nightmare to maintain.

What about integrating all of such validation checkers into one place?
So fsck part will only need to check their cross reference without
bothering such corruption.


I was confused how to fix the bug(new checker or little changes
in this patch).
The reason why I fix it in this way is that most callers do
check type before calling btrfs_extent_inline_ref_size().

Since you prefer the former, I'm going to do it.


Current version looks good enough as a fix.>
Just saying we'd better using an integrated solution later.


I agree with you that we should have an integrated solution but frankly
I'd rather see it sooner rather than later, because history has shown
that if something is not done when it's first talked about it usually
gets bogged down by other work.  With this in mind, Su, might I ask you
that you repost the patch but with the centralised idea of handling the
validation lifted from Qu's kernel side patches.


OK. Thanks for your suggestion:).
Most of work has been finished. But I found the original mode can
not handle the new test case correctly. I'm Working on it.

Thanks,
Su


Thanks,
Qu


Thanks
Su


Just like the kernel patch I submitted some times ago.
https://www.spinics.net/lists/linux-btrfs/msg68498.html

Thanks,
Qu


---
   cmds-check.c | 9 +
   1 file changed, 9 insertions(+)

diff --git a/cmds-check.c b/cmds-check.c
index 4e0b0fe4..74c10c75 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -11565,6 +11565,10 @@ static int check_tree_block_ref(struct
btrfs_root *root,
   offset, level + 1, owner,
   NULL);
   }
+    } else {
+    error("extent[%llu %u %llu] has unknown ref type: %d",
+  key.objectid, key.type, key.offset, type);
+    break;
   }
   if (found_ref)
@@ -11831,6 +11835,11 @@ static int check_extent_data_item(struct
btrfs_root *root,
   found_dbackref = !check_tree_block_ref(root, NULL,
   btrfs_extent_inline_ref_offset(leaf, iref),
   0, owner, NULL);
+    } else {
+    error("extent[%llu %u %llu] has unknown ref type: %d",
+  disk_bytenr, BTRFS_EXTENT_DATA_KEY,
+  disk_num_bytes, type);
+    break;
   }
   if (found_dbackref)








--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html







--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Marat Khalili

On 25/09/17 11:08, Qu Wenruo wrote:
What about redirecting stdout to /dev/null and redirecting stderr to 
mail if return value is not 0?

s/if return value is not 0/if return value is 0/.

The main point is, if btrfs returns 0, then nothing to worry about.
(Unless there is a bug, even in that case keep an eye on stderr should 
be enough to catch that)


Redirection to /dev/null will work. However,

1) It will always looks suspicious. grep -v with expected message is at 
least clear about its intent and consequences.


2) Although shorter than grep -v, it will still take space in shell 
scripts and force one to remember btrfs commands one has to add it 
after. This is already inconvenient enough to want a fix.


--

With Best Regards,
Marat Khalili
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Marat Khalili

On 25/09/17 10:52, Hugo Mills wrote:


Isn't the correct way to catch the return value instead of grepping
the output?
It is, but if, for example, you're using the command in a cron
script which is expected to work, you don't want it producing output
because then you get a mail every time the script runs. So you have to
grep -v on the "success" output to make the successful script silent.


If it's some command not returning value properly, would you please
report it as a bug so we can fix it.

It's not the return value that's problematic (although those used
to be a real mess). It's the fact that a successful run of the command
produces noise on stdout, which most commands don't.
Yes, exactly: cron, mail -E and just long scripts where btrfs operations 
are small steps here and there.


(On top of this, actually catching the return value from the right 
command before `| grep -v` with errexit and pipefail on is so difficult 
that I usually end up rewriting whole mess in Python. Which would be 
nice result in itself if it didn't take a whole day in place of one 
minute for bash line.)


--

With Best Regards,
Marat Khalili

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Btrfs performance with small blocksize on SSD

2017-09-25 Thread Kai Krakow
Am Mon, 25 Sep 2017 07:04:14 +
schrieb "Fuhrmann, Carsten" :

> Well the correct translation for "Laufzeit" is runtime and not
> latency. But thank you for that hint, I'll change it to "gesamt
> Laufzeit" to make it more clear.

How about better translating it to English in the first place as you're
trying to reach an international community?

Also, it would be nice to put the exact test you did as a command line
or configuration file, so it can be replayed on other systems, and -
most important - by the developers, to easily uncover what is causing
the behavior...


> Best regards
> 
> Carsten
> 
> -Ursprüngliche Nachricht-
> Von: Andrei Borzenkov [mailto:arvidj...@gmail.com] 
> Gesendet: Sonntag, 24. September 2017 18:43
> An: Fuhrmann, Carsten ; Qu Wenruo
> ; linux-btrfs@vger.kernel.org Betreff: Re:
> AW: Btrfs performance with small blocksize on SSD
> 
> 24.09.2017 16:53, Fuhrmann, Carsten пишет:
> > Hello,
> > 
> > 1)
> > I used direct write (no page cache) but I didn't disable the Disk
> > cache of the HDD/SSD itself. In all tests I wrote 1GB and looked
> > for the runtime of that write process.  
> 
> So "latency" on your diagram means total time to write 1GiB file?
> That is highly unusual meaning for "latency" which normally means
> time to perform single IO. If so, you should better rename Y-axis to
> something like "total run time".
> N_r__yb_Xv_^_)__{.n_+{_n___)w*jg______j/___z_2___&_)___a_____G___h__j:+v___w


-- 
Regards,
Kai

Replies to list-only preferred.


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 16:04, Qu Wenruo wrote:



On 2017年09月25日 15:52, Hugo Mills wrote:

On Mon, Sep 25, 2017 at 03:46:15PM +0800, Qu Wenruo wrote:



On 2017年09月25日 15:42, Marat Khalili wrote:

On 25/09/17 10:30, Nikolay Borisov wrote:

On 19.09.2017 10:41, Misono, Tomohiro wrote:

"btrfs subvolume create/delete" outputs the message of "Create/Delete
subvolume ..." even when an operation fails.
Since it is confusing, let's outputs the message only when an
operation succeeds.
Please change the verb to past tense, more strongly signaling 
success -

i.e. "Created subvolume"

What about recalling some UNIX standards and returning to NOT
outputting any message when operation succeeds? My scripts are
full of grep -v calls after each btrfs command, and this sucks
(and I don't think I'm alone in this situation).


Isn't the correct way to catch the return value instead of grepping
the output?


    It is, but if, for example, you're using the command in a cron
script which is expected to work, you don't want it producing output
because then you get a mail every time the script runs. So you have to
grep -v on the "success" output to make the successful script silent.


What about redirecting stdout to /dev/null and redirecting stderr to 
mail if return value is not 0?

s/if return value is not 0/if return value is 0/.

The main point is, if btrfs returns 0, then nothing to worry about.
(Unless there is a bug, even in that case keep an eye on stderr should 
be enough to catch that)


Thanks,
Qu
As for expected-to-work case, the stdout doesn't has much meaning and 
return value should be good enough to judge the result.





If it's some command not returning value properly, would you please
report it as a bug so we can fix it.


    It's not the return value that's problematic (although those used
to be a real mess). It's the fact that a successful run of the command
produces noise on stdout, which most commands don't.


Yes, a lot of tried-and-true tools don't output anything for successful 
run, but also a lot of other tools do output something by default, 
especially for complex tools like LVM.


Maybe we can introduce a global --quite option to silent some output.

Thanks,
Qu


    Hugo.

Thanks,
Qu


If you change the message a lot of scripts will have to be
changed, at least make it worth it.

  --

With Best Regards,
Marat Khaliili




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 15:52, Hugo Mills wrote:

On Mon, Sep 25, 2017 at 03:46:15PM +0800, Qu Wenruo wrote:



On 2017年09月25日 15:42, Marat Khalili wrote:

On 25/09/17 10:30, Nikolay Borisov wrote:

On 19.09.2017 10:41, Misono, Tomohiro wrote:

"btrfs subvolume create/delete" outputs the message of "Create/Delete
subvolume ..." even when an operation fails.
Since it is confusing, let's outputs the message only when an
operation succeeds.

Please change the verb to past tense, more strongly signaling success -
i.e. "Created subvolume"

What about recalling some UNIX standards and returning to NOT
outputting any message when operation succeeds? My scripts are
full of grep -v calls after each btrfs command, and this sucks
(and I don't think I'm alone in this situation).


Isn't the correct way to catch the return value instead of grepping
the output?


It is, but if, for example, you're using the command in a cron
script which is expected to work, you don't want it producing output
because then you get a mail every time the script runs. So you have to
grep -v on the "success" output to make the successful script silent.


What about redirecting stdout to /dev/null and redirecting stderr to 
mail if return value is not 0?
As for expected-to-work case, the stdout doesn't has much meaning and 
return value should be good enough to judge the result.





If it's some command not returning value properly, would you please
report it as a bug so we can fix it.


It's not the return value that's problematic (although those used
to be a real mess). It's the fact that a successful run of the command
produces noise on stdout, which most commands don't.


Yes, a lot of tried-and-true tools don't output anything for successful 
run, but also a lot of other tools do output something by default, 
especially for complex tools like LVM.


Maybe we can introduce a global --quite option to silent some output.

Thanks,
Qu


Hugo.
  

Thanks,
Qu


If you change the message a lot of scripts will have to be
changed, at least make it worth it.

  --

With Best Regards,
Marat Khaliili




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Hugo Mills
On Mon, Sep 25, 2017 at 03:46:15PM +0800, Qu Wenruo wrote:
> 
> 
> On 2017年09月25日 15:42, Marat Khalili wrote:
> >On 25/09/17 10:30, Nikolay Borisov wrote:
> >>On 19.09.2017 10:41, Misono, Tomohiro wrote:
> >>>"btrfs subvolume create/delete" outputs the message of "Create/Delete
> >>>subvolume ..." even when an operation fails.
> >>>Since it is confusing, let's outputs the message only when an
> >>>operation succeeds.
> >>Please change the verb to past tense, more strongly signaling success -
> >>i.e. "Created subvolume"
> >What about recalling some UNIX standards and returning to NOT
> >outputting any message when operation succeeds? My scripts are
> >full of grep -v calls after each btrfs command, and this sucks
> >(and I don't think I'm alone in this situation).
> 
> Isn't the correct way to catch the return value instead of grepping
> the output?

   It is, but if, for example, you're using the command in a cron
script which is expected to work, you don't want it producing output
because then you get a mail every time the script runs. So you have to
grep -v on the "success" output to make the successful script silent.

> If it's some command not returning value properly, would you please
> report it as a bug so we can fix it.

   It's not the return value that's problematic (although those used
to be a real mess). It's the fact that a successful run of the command
produces noise on stdout, which most commands don't.

   Hugo.
 
> Thanks,
> Qu
> 
> >If you change the message a lot of scripts will have to be
> >changed, at least make it worth it.
> >
> >  --
> >
> >With Best Regards,
> >Marat Khaliili
> >

-- 
Hugo Mills | If you see something, say nothing and drink to
hugo@... carfax.org.uk | forget
http://carfax.org.uk/  |
PGP: E2AB1DE4  | Welcome to Night Vale


signature.asc
Description: Digital signature


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Qu Wenruo



On 2017年09月25日 15:42, Marat Khalili wrote:

On 25/09/17 10:30, Nikolay Borisov wrote:

On 19.09.2017 10:41, Misono, Tomohiro wrote:

"btrfs subvolume create/delete" outputs the message of "Create/Delete
subvolume ..." even when an operation fails.
Since it is confusing, let's outputs the message only when an 
operation succeeds.

Please change the verb to past tense, more strongly signaling success -
i.e. "Created subvolume"
What about recalling some UNIX standards and returning to NOT outputting 
any message when operation succeeds? My scripts are full of grep -v 
calls after each btrfs command, and this sucks (and I don't think I'm 
alone in this situation).


Isn't the correct way to catch the return value instead of grepping the 
output?


If it's some command not returning value properly, would you please 
report it as a bug so we can fix it.


Thanks,
Qu

If you change the message a lot of scripts 
will have to be changed, at least make it worth it.


  --

With Best Regards,
Marat Khaliili

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Hugo Mills
On Mon, Sep 25, 2017 at 10:42:06AM +0300, Marat Khalili wrote:
> On 25/09/17 10:30, Nikolay Borisov wrote:
> >On 19.09.2017 10:41, Misono, Tomohiro wrote:
> >>"btrfs subvolume create/delete" outputs the message of "Create/Delete
> >>subvolume ..." even when an operation fails.
> >>Since it is confusing, let's outputs the message only when an operation 
> >>succeeds.
> >Please change the verb to past tense, more strongly signaling success -
> >i.e. "Created subvolume"
> What about recalling some UNIX standards and returning to NOT
> outputting any message when operation succeeds? My scripts are full
> of grep -v calls after each btrfs command, and this sucks (and I
> don't think I'm alone in this situation). If you change the message
> a lot of scripts will have to be changed, at least make it worth it.

   Seconded. Make sure the return code reflects the result, and drop
the printed message (or keep it if there's a --verbose flag, maybe).

   Hugo.

-- 
Hugo Mills | If you see something, say nothing and drink to
hugo@... carfax.org.uk | forget
http://carfax.org.uk/  |
PGP: E2AB1DE4  | Welcome to Night Vale


signature.asc
Description: Digital signature


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Marat Khalili

On 25/09/17 10:30, Nikolay Borisov wrote:

On 19.09.2017 10:41, Misono, Tomohiro wrote:

"btrfs subvolume create/delete" outputs the message of "Create/Delete
subvolume ..." even when an operation fails.
Since it is confusing, let's outputs the message only when an operation 
succeeds.

Please change the verb to past tense, more strongly signaling success -
i.e. "Created subvolume"
What about recalling some UNIX standards and returning to NOT outputting 
any message when operation succeeds? My scripts are full of grep -v 
calls after each btrfs command, and this sucks (and I don't think I'm 
alone in this situation). If you change the message a lot of scripts 
will have to be changed, at least make it worth it.


 --

With Best Regards,
Marat Khaliili

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: subvolume: outputs message only when operation succeeds

2017-09-25 Thread Nikolay Borisov


On 19.09.2017 10:41, Misono, Tomohiro wrote:
> "btrfs subvolume create/delete" outputs the message of "Create/Delete
> subvolume ..." even when an operation fails.
> Since it is confusing, let's outputs the message only when an operation 
> succeeds.
> 
> Signed-off-by: Tomohiro Misono 
> ---
>  cmds-subvolume.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/cmds-subvolume.c b/cmds-subvolume.c
> index 666f6e0..6d4b0fe 100644
> --- a/cmds-subvolume.c
> +++ b/cmds-subvolume.c
> @@ -189,7 +189,6 @@ static int cmd_subvol_create(int argc, char **argv)
>   if (fddst < 0)
>   goto out;
>  
> - printf("Create subvolume '%s/%s'\n", dstdir, newname);
>   if (inherit) {
>   struct btrfs_ioctl_vol_args_v2  args;
>  
> @@ -213,6 +212,7 @@ static int cmd_subvol_create(int argc, char **argv)
>   error("cannot create subvolume: %s", strerror(errno));
>   goto out;
>   }
> + printf("Create subvolume '%s/%s'\n", dstdir, newname);

Please change the verb to past tense, more strongly signaling success -
i.e. "Created subvolume"

>  
>   retval = 0; /* success */
>  out:
> @@ -337,9 +337,6 @@ again:
>   goto out;
>   }
>  
> - printf("Delete subvolume (%s): '%s/%s'\n",
> - commit_mode == 2 || (commit_mode == 1 && cnt + 1 == argc)
> - ? "commit" : "no-commit", dname, vname);
>   memset(, 0, sizeof(args));
>   strncpy_null(args.name, vname);
>   res = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, );
> @@ -349,6 +346,9 @@ again:
>   ret = 1;
>   goto out;
>   }
> + printf("Delete subvolume (%s): '%s/%s'\n",
> + commit_mode == 2 || (commit_mode == 1 && cnt + 1 == argc)
> + ? "commit" : "no-commit", dname, vname);

Ditto: "Deleted subvolume"

>  
>   if (commit_mode == 1) {
>   res = wait_for_commit(fd);
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] btrfs: remove unused setup_root_args()

2017-09-25 Thread Misono, Tomohiro
Since setup_root_args() is not used anymore, just remove it.

Signed-off-by: Tomohiro Misono 
---
 fs/btrfs/super.c | 35 ---
 1 file changed, 35 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 7edd74d..f589c5b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1367,41 +1367,6 @@ static inline int is_subvolume_inode(struct inode *inode)
return 0;
 }
 
-/*
- * This will add subvolid=0 to the argument string while removing any subvol=
- * and subvolid= arguments to make sure we get the top-level root for path
- * walking to the subvol we want.
- */
-static char *setup_root_args(char *args)
-{
-   char *buf, *dst, *sep;
-
-   if (!args)
-   return kstrdup("subvolid=0", GFP_NOFS);
-
-   /* The worst case is that we add ",subvolid=0" to the end. */
-   buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS);
-   if (!buf)
-   return NULL;
-
-   while (1) {
-   sep = strchrnul(args, ',');
-   if (!strstarts(args, "subvol=") &&
-   !strstarts(args, "subvolid=")) {
-   memcpy(dst, args, sep - args);
-   dst += sep - args;
-   *dst++ = ',';
-   }
-   if (*sep)
-   args = sep + 1;
-   else
-   break;
-   }
-   strcpy(dst, "subvolid=0");
-
-   return buf;
-}
-
 static struct dentry *mount_subvol(const char *subvol_name, u64 
subvol_objectid,
   int flags, const char *device_name,
   char *data, struct vfsmount *mnt)
-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] btrfs: split parse_early_options() in two

2017-09-25 Thread Misono, Tomohiro
Now parse_early_options() is used by both btrfs_mount() and mount_root().
However, the former only needs subvol related part and the latter needs
the others.

Therefore extract the subvol related parts from parse_early_options() and
move it to new parse function (parse_subvol_options()).

Signed-off-by: Tomohiro Misono 
---
 fs/btrfs/super.c | 85 +++-
 1 file changed, 60 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1c34ca6..7edd74d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -448,7 +448,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char 
*options,
case Opt_subvolrootid:
case Opt_device:
/*
-* These are parsed by btrfs_parse_early_options
+* These are parsed by btrfs_parse_subvol_options
+* and btrfs_parse_early_options
 * and can be happily ignored here.
 */
break;
@@ -855,11 +856,63 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char 
*options,
  * only when we need to allocate a new super block.
  */
 static int btrfs_parse_early_options(const char *options, fmode_t flags,
-   void *holder, char **subvol_name, u64 *subvol_objectid,
-   struct btrfs_fs_devices **fs_devices)
+   void *holder, struct btrfs_fs_devices **fs_devices)
 {
substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p;
+   int error = 0;
+
+   if (!options)
+   return 0;
+
+   /*
+* strsep changes the string, duplicate it because btrfs_parse_options
+* gets called later
+*/
+   opts = kstrdup(options, GFP_KERNEL);
+   if (!opts)
+   return -ENOMEM;
+   orig = opts;
+
+   while ((p = strsep(, ",")) != NULL) {
+   int token;
+   if (!*p)
+   continue;
+
+   token = match_token(p, tokens, args);
+   switch (token) {
+   case Opt_device:
+   device_name = match_strdup([0]);
+   if (!device_name) {
+   error = -ENOMEM;
+   goto out;
+   }
+   error = btrfs_scan_one_device(device_name,
+   flags, holder, fs_devices);
+   kfree(device_name);
+   if (error)
+   goto out;
+   break;
+   default:
+   break;
+   }
+   }
+
+out:
+   kfree(orig);
+   return error;
+}
+
+/*
+ * Parse mount options that are related to subvolume id
+ *
+ * The value is later passed to mount_subvol()
+ */
+static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
+   void *holder, char **subvol_name, u64 *subvol_objectid)
+{
+   substring_t args[MAX_OPT_ARGS];
+   char *opts, *orig, *p;
char *num = NULL;
int error = 0;
 
@@ -867,8 +920,8 @@ static int btrfs_parse_early_options(const char *options, 
fmode_t flags,
return 0;
 
/*
-* strsep changes the string, duplicate it because parse_options
-* gets called twice
+* strsep changes the string, duplicate it because
+* btrfs_parse_early_options gets called later
 */
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
@@ -907,18 +960,6 @@ static int btrfs_parse_early_options(const char *options, 
fmode_t flags,
case Opt_subvolrootid:
pr_warn("BTRFS: 'subvolrootid' mount option is 
deprecated and has no effect\n");
break;
-   case Opt_device:
-   device_name = match_strdup([0]);
-   if (!device_name) {
-   error = -ENOMEM;
-   goto out;
-   }
-   error = btrfs_scan_one_device(device_name,
-   flags, holder, fs_devices);
-   kfree(device_name);
-   if (error)
-   goto out;
-   break;
default:
break;
}
@@ -1492,18 +1533,14 @@ static struct dentry *mount_root(struct 
file_system_type *fs_type, int flags,
struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts;
fmode_t mode = FMODE_READ;
-   char *subvol_name = NULL;
-   u64 subvol_objectid = 0;
int error = 0;
 
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
 
error = btrfs_parse_early_options(data, 

[PATCH 2/4] btrfs: cleanup btrfs_mount() using mount_root()

2017-09-25 Thread Misono, Tomohiro
Cleanups btrfs_mount() by using mount_root(). This avoids getting
btrfs_mount() called twice in mount path.

Old btrfs_mount() will do:
0. VFS layer calls vfs_kern_mount() with registered file_system_type
   (for btrfs, btrfs_fs_type). btrfs_mount() is called on the way.
1. btrfs_parse_early_options() parses "subvolid=" mount option and set the
   value to subvol_objectid. Otherwise, subvol_objectid has the initial
value of 0
2. check subvol_objectid is 5 or not. This time id is not 5, and
   btrfs_mount() returns by calling mount_subvol()
3. In mount_subvol(), original mount options are modified to contain
   "subvolid=0" in setup_root_args(). Then, vfs_kern_mount() is called with
   btrfs_fs_type and new options
4. btrfs_mount() is called again
5. btrfs_parse_early_options() parses "subvolid=0" and set 5 (instead of 0)
   to subvol_objectid
6. check subvol_objectid is 5 or not. This time id is 5 and mount_subvol()
   is not called. btrfs_mount() finishes mounting a root
7. (in mount_subvol()) with using a return vale of vfs_kern_mount(), it
   calls mount_subtree()
8 return subvolume's dentry

Reusing the same file_system_type (and btrfs_mount()) for vfs_kern_mount()
is the cause of complication.

Instead, new btrfs_mount() will do:
1. parse subvol id related options for later use in mount_subvol()
2. mount device's root by calling vfs_kern_mount() with
   btrfs_root_fs_type, which is not registered to VFS by
 register_filesystem(). As a result, mount_root() is called
3. return by calling mount_subvol()

The code of 2. is moved from the first part of mount_subvol().

Signed-off-by: Tomohiro Misono 
---
 fs/btrfs/super.c | 186 +--
 1 file changed, 58 insertions(+), 128 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index fe43606..1c34ca6 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1363,48 +1363,11 @@ static char *setup_root_args(char *args)
 
 static struct dentry *mount_subvol(const char *subvol_name, u64 
subvol_objectid,
   int flags, const char *device_name,
-  char *data)
+  char *data, struct vfsmount *mnt)
 {
struct dentry *root;
-   struct vfsmount *mnt = NULL;
-   char *newargs;
int ret;
 
-   newargs = setup_root_args(data);
-   if (!newargs) {
-   root = ERR_PTR(-ENOMEM);
-   goto out;
-   }
-
-   mnt = vfs_kern_mount(_fs_type, flags, device_name, newargs);
-   if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
-   if (flags & MS_RDONLY) {
-   mnt = vfs_kern_mount(_fs_type, flags & ~MS_RDONLY,
-device_name, newargs);
-   } else {
-   mnt = vfs_kern_mount(_fs_type, flags | MS_RDONLY,
-device_name, newargs);
-   if (IS_ERR(mnt)) {
-   root = ERR_CAST(mnt);
-   mnt = NULL;
-   goto out;
-   }
-
-   down_write(>mnt_sb->s_umount);
-   ret = btrfs_remount(mnt->mnt_sb, , NULL);
-   up_write(>mnt_sb->s_umount);
-   if (ret < 0) {
-   root = ERR_PTR(ret);
-   goto out;
-   }
-   }
-   }
-   if (IS_ERR(mnt)) {
-   root = ERR_CAST(mnt);
-   mnt = NULL;
-   goto out;
-   }
-
if (!subvol_name) {
if (!subvol_objectid) {
ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
@@ -1460,7 +1423,6 @@ static struct dentry *mount_subvol(const char 
*subvol_name, u64 subvol_objectid,
 
 out:
mntput(mnt);
-   kfree(newargs);
kfree(subvol_name);
return root;
 }
@@ -1515,6 +1477,12 @@ static int setup_security_options(struct btrfs_fs_info 
*fs_info,
return ret;
 }
 
+/*
+ * Find a superblock for the given device / mount point.
+ *
+ * Note:  This is based on mount_bdev from fs/super.c with a few additions
+ *   for multiple device setup.  Make sure to keep it in sync.
+ */
 static struct dentry *mount_root(struct file_system_type *fs_type, int flags,
const char *device_name, void *data)
 {
@@ -1621,20 +1589,34 @@ static struct dentry *mount_root(struct 
file_system_type *fs_type, int flags,
security_free_mnt_opts(_sec_opts);
return ERR_PTR(error);
 }
+
 /*
- * Find a superblock for the given device / mount point.
+ * Mount function which is called by VFS layer.
  *
- * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
- *   for multiple device setup.  Make sure to keep it in sync.
+ * In order to allow mounting a 

[PATCH 1/4] btrfs: add mount_root() and new file_system_type

2017-09-25 Thread Misono, Tomohiro
Add mount_root() and new file_system_type for preparation of cleanup of
btrfs_mount(). Code path is not changed yet.

mount_root() is almost the same as current btrfs_mount(), but doesn't
have subvolume related part.

Signed-off-by: Tomohiro Misono 
---
 fs/btrfs/super.c | 116 +++
 1 file changed, 116 insertions(+)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 12540b6..fe43606 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,7 @@
 
 static const struct super_operations btrfs_super_ops;
 static struct file_system_type btrfs_fs_type;
+static struct file_system_type btrfs_root_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
@@ -1514,6 +1515,112 @@ static int setup_security_options(struct btrfs_fs_info 
*fs_info,
return ret;
 }
 
+static struct dentry *mount_root(struct file_system_type *fs_type, int flags,
+   const char *device_name, void *data)
+{
+   struct block_device *bdev = NULL;
+   struct super_block *s;
+   struct btrfs_fs_devices *fs_devices = NULL;
+   struct btrfs_fs_info *fs_info = NULL;
+   struct security_mnt_opts new_sec_opts;
+   fmode_t mode = FMODE_READ;
+   char *subvol_name = NULL;
+   u64 subvol_objectid = 0;
+   int error = 0;
+
+   if (!(flags & MS_RDONLY))
+   mode |= FMODE_WRITE;
+
+   error = btrfs_parse_early_options(data, mode, fs_type,
+ _name, _objectid,
+ _devices);
+   if (error) {
+   kfree(subvol_name);
+   return ERR_PTR(error);
+   }
+
+   security_init_mnt_opts(_sec_opts);
+   if (data) {
+   error = parse_security_options(data, _sec_opts);
+   if (error)
+   return ERR_PTR(error);
+   }
+
+   error = btrfs_scan_one_device(device_name, mode, fs_type, _devices);
+   if (error)
+   goto error_sec_opts;
+
+   /*
+* Setup a dummy root and fs_info for test/set super.  This is because
+* we don't actually fill this stuff out until open_ctree, but we need
+* it for searching for existing supers, so this lets us do that and
+* then open_ctree will properly initialize everything later.
+*/
+   fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
+   if (!fs_info) {
+   error = -ENOMEM;
+   goto error_sec_opts;
+   }
+
+   fs_info->fs_devices = fs_devices;
+
+   fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
+   fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
+   security_init_mnt_opts(_info->security_opts);
+   if (!fs_info->super_copy || !fs_info->super_for_commit) {
+   error = -ENOMEM;
+   goto error_fs_info;
+   }
+
+   error = btrfs_open_devices(fs_devices, mode, fs_type);
+   if (error)
+   goto error_fs_info;
+
+   if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
+   error = -EACCES;
+   goto error_close_devices;
+   }
+
+   bdev = fs_devices->latest_bdev;
+   s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
+fs_info);
+   if (IS_ERR(s)) {
+   error = PTR_ERR(s);
+   goto error_close_devices;
+   }
+
+   if (s->s_root) {
+   btrfs_close_devices(fs_devices);
+   free_fs_info(fs_info);
+   if ((flags ^ s->s_flags) & MS_RDONLY)
+   error = -EBUSY;
+   } else {
+   snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
+   btrfs_sb(s)->bdev_holder = fs_type;
+   error = btrfs_fill_super(s, fs_devices, data);
+   }
+   if (error) {
+   deactivate_locked_super(s);
+   goto error_sec_opts;
+   }
+
+   fs_info = btrfs_sb(s);
+   error = setup_security_options(fs_info, s, _sec_opts);
+   if (error) {
+   deactivate_locked_super(s);
+   goto error_sec_opts;
+   }
+
+   return dget(s->s_root);
+
+error_close_devices:
+   btrfs_close_devices(fs_devices);
+error_fs_info:
+   free_fs_info(fs_info);
+error_sec_opts:
+   security_free_mnt_opts(_sec_opts);
+   return ERR_PTR(error);
+}
 /*
  * Find a superblock for the given device / mount point.
  *
@@ -2133,6 +2240,15 @@ static struct file_system_type btrfs_fs_type = {
.kill_sb= btrfs_kill_super,
.fs_flags   = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
 };
+
+static struct file_system_type btrfs_root_fs_type = {
+   .owner  = THIS_MODULE,
+   .name   = "btrfs",
+   .mount  = mount_root,
+   .kill_sb= btrfs_kill_super,
+   .fs_flags   = FS_REQUIRES_DEV | 

[PATCH v3 0/4] btrfs: cleanup mount path

2017-09-25 Thread Misono, Tomohiro
Summary:
Cleanup mount path by avoiding calling btrfs_mount() twice.
No functional change. See below for longer explanation.

Changelog:
v3:
  Reorganized patches again into four and added comments to the source.
  Each patch can be applied and compiled while maintaining functionality.
  The first one is the preparation and the second one is the main part.
  The last two are small cleanups.

v2:
  Split the patch into three parts.

Tomohiro Misono (4):
  btrfs: add mount_root() and new file_system_type
  btrfs: cleanup btrfs_mount() using mount_root()
  btrfs: split parse_early_options() in two
  btrfs: remove unused setup_root_args()

 fs/btrfs/super.c | 252 ---
 1 file changed, 149 insertions(+), 103 deletions(-)


Background Explanation:
btrfs uses mount_subtree() to mount a subvolume directly.  This function
needs a vfsmount* of device's root (/), which is a return value of
vfs_kern_mount() (therefore root has to be mounted internally anyway).

Current approach of getting root's vfsmount* in mount time is a bit tricky:
0. VFS layer calls vfs_kern_mount() with registered file_system_type
   (for btrfs, btrfs_fs_type). btrfs_mount() is called on the way.
1. btrfs_parse_early_options() parses "subvolid=" mount option and set the
   value to subvol_objectid. Otherwise, subvol_objectid has the initial
   value of 0
2. check subvol_objectid is 5 or not. This time id is not 5, and
   btrfs_mount() returns by calling mount_subvol()
3. In mount_subvol(), original mount options are modified to contain
   "subvolid=0" in setup_root_args(). Then, vfs_kern_mount() is called with
   btrfs_fs_type and new options
4. btrfs_mount() is called again
5. btrfs_parse_early_options() parses "subvolid=0" and set 5 (instead of 0)
   to subvol_objectid
6. check subvol_objectid is 5 or not. This time id is 5 and mount_subvol()
   is not called. btrfs_mount() finishes mounting a root
7. (in mount_subvol()) with using a return vale of vfs_kern_mount(), it
   calls mount_subtree()
8 return subvolume's dentry

As illustrated above, calling btrfs_mount() twice complicates the problem.
We can use another file_system_type (which has different callback
function to mount root) for arguments of our vfs_kern_mount() call to 
avoid this.

In this approach: 
1. parse subvol id related options for later use in mount_subvol()
2. mount device's root by calling vfs_kern_mount() with
   different file_system_type
3. return by calling mount_subvol()

I think this approach is the same as nfsv4, which is the only other
filesystem using mount_subtree() currently, and easy to understand.

Most of the change is done by just reorganizing the original code of
btrfs_mount()/mount_subvol() into btrfs_mount()/mount_subvol()/mount_root()

btrfs_parse_early_options() is split into two parts to avoid "device="
option will be handled twice (though it cause no harm). setup_root_args()
is deleted as not needed anymore.

-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


AW: AW: Btrfs performance with small blocksize on SSD

2017-09-25 Thread Fuhrmann, Carsten
Well the correct translation for "Laufzeit" is runtime and not latency. But 
thank you for that hint, I'll change it to "gesamt Laufzeit" to make it more 
clear.


Best regards

Carsten

-Ursprüngliche Nachricht-
Von: Andrei Borzenkov [mailto:arvidj...@gmail.com] 
Gesendet: Sonntag, 24. September 2017 18:43
An: Fuhrmann, Carsten ; Qu Wenruo 
; linux-btrfs@vger.kernel.org
Betreff: Re: AW: Btrfs performance with small blocksize on SSD

24.09.2017 16:53, Fuhrmann, Carsten пишет:
> Hello,
> 
> 1)
> I used direct write (no page cache) but I didn't disable the Disk cache of 
> the HDD/SSD itself. In all tests I wrote 1GB and looked for the runtime of 
> that write process.

So "latency" on your diagram means total time to write 1GiB file? That is 
highly unusual meaning for "latency" which normally means time to perform 
single IO. If so, you should better rename Y-axis to something like "total run 
time".
N�r��yb�X��ǧv�^�)޺{.n�+{�n�߲)w*jg����ݢj/���z�ޖ��2�ޙ&�)ߡ�a�����G���h��j:+v���w��٥