[PATCH v2 5/5] btrfs-progs: subvol: fix subvol del --commit-after

2017-09-26 Thread Misono, Tomohiro
Fix subvol del --commit-after to work properly:
 - SYNC ioctl will be issued even when last delete is failed
 - SYNC ioctl will be issued on each file system only once in the end

To achieve this, get_fsid() and add_seen_fsid() is called after each delete
to keep only one fd for each fs.

In the end, seen_fsid_hash will be traversed and SYNC is issued on each fs.

Signed-off-by: Tomohiro Misono 
Reviewed-by: Qu Wenruo 
---
 cmds-subvolume.c | 61 
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 9e561f3..0c8a75f 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -263,6 +263,9 @@ static int cmd_subvol_delete(int argc, char **argv)
DIR *dirstream = NULL;
int verbose = 0;
int commit_mode = 0;
+   u8 fsid[BTRFS_FSID_SIZE];
+   char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+   struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
enum { COMMIT_AFTER = 1, COMMIT_EACH = 2 };
 
while (1) {
@@ -358,31 +361,63 @@ again:
path, strerror(errno));
ret = 1;
}
+   } else if (commit_mode == COMMIT_AFTER) {
+   res = get_fsid(dname, fsid, 0);
+   if (res < 0) {
+   error("unable to get fsid for '%s': %s", path, 
strerror(-res));
+   error("delete suceeded but commit may not be done in 
the end");
+   ret = 1;
+   goto out;
+   }
+
+   if (add_seen_fsid(fsid, seen_fsid_hash, fd, dirstream) == 0) {
+   if (verbose > 0) {
+   uuid_unparse(fsid, uuidbuf);
+   printf("  new fs is found for '%s', fsid: 
%s\n", path, uuidbuf);
+   }
+   // this is the first time a subvolume on this 
filesystem is deleted
+   // keep fd in order to issue SYNC ioctl in the end
+   goto keep_fd;
+   }
}
 
 out:
+   close_file_or_dir(fd, dirstream);
+keep_fd:
+   fd = -1;
+   dirstream = NULL;
free(dupdname);
free(dupvname);
dupdname = NULL;
dupvname = NULL;
cnt++;
-   if (cnt < argc) {
-   close_file_or_dir(fd, dirstream);
-   /* avoid double free */
-   fd = -1;
-   dirstream = NULL;
+   if (cnt < argc)
goto again;
-   }
 
-   if (commit_mode == COMMIT_AFTER && fd != -1) {
-   res = wait_for_commit(fd);
-   if (res < 0) {
-   error("unable to do final sync after deletion: %s",
-   strerror(errno));
-   ret = 1;
+   if (commit_mode == COMMIT_AFTER) {
+   // traverse seen_fsid_hash and issue SYNC ioctl on each 
filesystem
+   int slot;
+   struct seen_fsid *seen;
+
+   for (slot = 0; slot < SEEN_FSID_HASH_SIZE; slot++) {
+   seen = seen_fsid_hash[slot];
+   while (seen) {
+   res = wait_for_commit(seen->fd);
+   if (res < 0) {
+   uuid_unparse(seen->fsid, uuidbuf);
+   error("unable to do final sync after 
deletion: %s, fsid: %s",
+   strerror(errno), uuidbuf);
+   ret = 1;
+   } else if (verbose > 0) {
+   uuid_unparse(seen->fsid, uuidbuf);
+   printf("final sync is done for fsid: 
%s\n", uuidbuf);
+   }
+   seen = seen->next;
+   }
}
+   // fd will also be closed in free_seen_fsid
+   free_seen_fsid(seen_fsid_hash);
}
-   close_file_or_dir(fd, dirstream);
 
return ret;
 }
-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 4/5] btrfs-progs: change seen_fsid to hold fd and DIR*

2017-09-26 Thread Misono, Tomohiro
Change seen_fsid to hold fd and DIR* in order to keep access to each fs.
This will be used for 'subvol delete --commit-after'.

Signed-off-by: Tomohiro Misono 
Reviewed-by: Qu Wenruo 
---
 cmds-filesystem.c | 4 ++--
 utils.c   | 6 +-
 utils.h   | 5 -
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index c7dae40..4bbff43 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -277,7 +277,7 @@ static void print_one_uuid(struct btrfs_fs_devices 
*fs_devices,
u64 devs_found = 0;
u64 total;
 
-   if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash))
+   if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash, -1, NULL))
return;
 
uuid_unparse(fs_devices->fsid, uuidbuf);
@@ -324,7 +324,7 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args 
*fs_info,
struct btrfs_ioctl_dev_info_args *tmp_dev_info;
int ret;
 
-   ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash);
+   ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash, -1, NULL);
if (ret == -EEXIST)
return 0;
else if (ret)
diff --git a/utils.c b/utils.c
index f91d41e..bdfbfe0 100644
--- a/utils.c
+++ b/utils.c
@@ -1804,7 +1804,8 @@ int is_seen_fsid(u8 *fsid, struct seen_fsid 
*seen_fsid_hash[])
return 0;
 }
 
-int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[])
+int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
+   int fd, DIR *dirstream)
 {
u8 hash = fsid[0];
int slot = hash % SEEN_FSID_HASH_SIZE;
@@ -1832,6 +1833,8 @@ insert:
 
alloc->next = NULL;
memcpy(alloc->fsid, fsid, BTRFS_FSID_SIZE);
+   alloc->fd = fd;
+   alloc->dirstream = dirstream;
 
if (seen)
seen->next = alloc;
@@ -1851,6 +1854,7 @@ void free_seen_fsid(struct seen_fsid *seen_fsid_hash[])
seen = seen_fsid_hash[slot];
while (seen) {
next = seen->next;
+   close_file_or_dir(seen->fd, seen->dirstream);
free(seen);
seen = next;
}
diff --git a/utils.h b/utils.h
index da34e6c..bac7688 100644
--- a/utils.h
+++ b/utils.h
@@ -107,9 +107,12 @@ int get_fsid(const char *path, u8 *fsid, int silent);
 struct seen_fsid {
u8 fsid[BTRFS_FSID_SIZE];
struct seen_fsid *next;
+   DIR *dirstream;
+   int fd;
 };
 int is_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
-int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
+int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
+   int fd, DIR *dirstream);
 void free_seen_fsid(struct seen_fsid *seen_fsid_hash[]);
 
 int get_label(const char *btrfs_dev, char *label);
-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/5] btrfs-progs: move seen_fsid to util.c

2017-09-26 Thread Misono, Tomohiro
Make is_seen_fsid()/add_seen_fsid()/free_seen_fsid() to common functions.
This will be used for 'subvol delete --commit-after'.

Signed-off-by: Tomohiro Misono 
Reviewed-by: Qu Wenruo 
---
 cmds-filesystem.c | 88 ---
 utils.c   | 70 +++
 utils.h   | 11 +++
 3 files changed, 86 insertions(+), 83 deletions(-)

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 018857c..c7dae40 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -30,7 +30,6 @@
 
 #include "kerncompat.h"
 #include "ctree.h"
-#include "ioctl.h"
 #include "utils.h"
 #include "volumes.h"
 #include "commands.h"
@@ -43,85 +42,8 @@
  * for btrfs fi show, we maintain a hash of fsids we've already printed.
  * This way we don't print dups if a given FS is mounted more than once.
  */
-#define SEEN_FSID_HASH_SIZE 256
-
-struct seen_fsid {
-   u8 fsid[BTRFS_FSID_SIZE];
-   struct seen_fsid *next;
-};
-
 static struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
 
-static int is_seen_fsid(u8 *fsid)
-{
-   u8 hash = fsid[0];
-   int slot = hash % SEEN_FSID_HASH_SIZE;
-   struct seen_fsid *seen = seen_fsid_hash[slot];
-
-   while (seen) {
-   if (memcmp(seen->fsid, fsid, BTRFS_FSID_SIZE) == 0)
-   return 1;
-
-   seen = seen->next;
-   }
-
-   return 0;
-}
-
-static int add_seen_fsid(u8 *fsid)
-{
-   u8 hash = fsid[0];
-   int slot = hash % SEEN_FSID_HASH_SIZE;
-   struct seen_fsid *seen = seen_fsid_hash[slot];
-   struct seen_fsid *alloc;
-
-   if (!seen)
-   goto insert;
-
-   while (1) {
-   if (memcmp(seen->fsid, fsid, BTRFS_FSID_SIZE) == 0)
-   return -EEXIST;
-
-   if (!seen->next)
-   break;
-
-   seen = seen->next;
-   }
-
-insert:
-
-   alloc = malloc(sizeof(*alloc));
-   if (!alloc)
-   return -ENOMEM;
-
-   alloc->next = NULL;
-   memcpy(alloc->fsid, fsid, BTRFS_FSID_SIZE);
-
-   if (seen)
-   seen->next = alloc;
-   else
-   seen_fsid_hash[slot] = alloc;
-
-   return 0;
-}
-
-static void free_seen_fsid(void)
-{
-   int slot;
-   struct seen_fsid *seen;
-   struct seen_fsid *next;
-
-   for (slot = 0; slot < SEEN_FSID_HASH_SIZE; slot++) {
-   seen = seen_fsid_hash[slot];
-   while (seen) {
-   next = seen->next;
-   free(seen);
-   seen = next;
-   }
-   seen_fsid_hash[slot] = NULL;
-   }
-}
-
 static const char * const filesystem_cmd_group_usage[] = {
"btrfs filesystem []  []",
NULL
@@ -355,7 +277,7 @@ static void print_one_uuid(struct btrfs_fs_devices 
*fs_devices,
u64 devs_found = 0;
u64 total;
 
-   if (add_seen_fsid(fs_devices->fsid))
+   if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash))
return;
 
uuid_unparse(fs_devices->fsid, uuidbuf);
@@ -402,7 +324,7 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args 
*fs_info,
struct btrfs_ioctl_dev_info_args *tmp_dev_info;
int ret;
 
-   ret = add_seen_fsid(fs_info->fsid);
+   ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash);
if (ret == -EEXIST)
return 0;
else if (ret)
@@ -474,7 +396,7 @@ static int btrfs_scan_kernel(void *search, unsigned 
unit_mode)
goto out;
 
/* skip all fs already shown as mounted fs */
-   if (is_seen_fsid(fs_info_arg.fsid))
+   if (is_seen_fsid(fs_info_arg.fsid, seen_fsid_hash))
continue;
 
ret = get_label_mounted(mnt->mnt_dir, label);
@@ -676,7 +598,7 @@ static int search_umounted_fs_uuids(struct list_head 
*all_uuids,
}
 
/* skip all fs already shown as mounted fs */
-   if (is_seen_fsid(cur_fs->fsid))
+   if (is_seen_fsid(cur_fs->fsid, seen_fsid_hash))
continue;
 
fs_copy = calloc(1, sizeof(*fs_copy));
@@ -908,7 +830,7 @@ devs_only:
free_fs_devices(fs_devices);
}
 out:
-   free_seen_fsid();
+   free_seen_fsid(seen_fsid_hash);
return ret;
 }
 
diff --git a/utils.c b/utils.c
index 4a5dc60..f91d41e 100644
--- a/utils.c
+++ b/utils.c
@@ -1788,6 +1788,76 @@ out:
return ret;
 }
 
+int is_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[])
+{
+   u8 hash = fsid[0];
+   int slot = hash % SEEN_FSID_HASH_SIZE;
+   struct seen_fsid *seen = seen_fsid_hash[slot];
+
+   while (seen) {
+   if (memcmp(seen->fsid, fsid, BTRFS_FSID_SIZE) == 0)
+   return 1;
+
+   seen = seen->next;
+   

[PATCH v2 2/5] btrfs-progs: move get_fsid() to util.c

2017-09-26 Thread Misono, Tomohiro
Make get_fsid() to a common function.
This will be used for 'subvol delete --commit-after'.

Signed-off-by: Tomohiro Misono 
Reviewed-by: Qu Wenruo 
---
 cmds-property.c | 30 --
 utils.c | 31 +++
 utils.h |  1 +
 3 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/cmds-property.c b/cmds-property.c
index 9ae1246..03bafa0 100644
--- a/cmds-property.c
+++ b/cmds-property.c
@@ -48,36 +48,6 @@ static int parse_prop(const char *arg, const struct 
prop_handler *props,
return -1;
 }
 
-static int get_fsid(const char *path, u8 *fsid, int silent)
-{
-   int ret;
-   int fd;
-   struct btrfs_ioctl_fs_info_args args;
-
-   fd = open(path, O_RDONLY);
-   if (fd < 0) {
-   ret = -errno;
-   if (!silent)
-   error("failed to open %s: %s", path,
-   strerror(-ret));
-   goto out;
-   }
-
-   ret = ioctl(fd, BTRFS_IOC_FS_INFO, );
-   if (ret < 0) {
-   ret = -errno;
-   goto out;
-   }
-
-   memcpy(fsid, args.fsid, BTRFS_FSID_SIZE);
-   ret = 0;
-
-out:
-   if (fd != -1)
-   close(fd);
-   return ret;
-}
-
 static int check_btrfs_object(const char *object)
 {
int ret;
diff --git a/utils.c b/utils.c
index 7a2710f..4a5dc60 100644
--- a/utils.c
+++ b/utils.c
@@ -1758,6 +1758,37 @@ out:
return ret;
 }
 
+int get_fsid(const char *path, u8 *fsid, int silent)
+{
+   int ret;
+   int fd;
+   struct btrfs_ioctl_fs_info_args args;
+
+   fd = open(path, O_RDONLY);
+   if (fd < 0) {
+   ret = -errno;
+   if (!silent)
+   error("failed to open %s: %s", path,
+   strerror(-ret));
+   goto out;
+   }
+
+   ret = ioctl(fd, BTRFS_IOC_FS_INFO, );
+   if (ret < 0) {
+   ret = -errno;
+   goto out;
+   }
+
+   memcpy(fsid, args.fsid, BTRFS_FSID_SIZE);
+   ret = 0;
+
+out:
+   if (fd != -1)
+   close(fd);
+   return ret;
+}
+
+
 static int group_profile_devs_min(u64 flag)
 {
switch (flag & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
diff --git a/utils.h b/utils.h
index d28a05a..b3aabe1 100644
--- a/utils.h
+++ b/utils.h
@@ -100,6 +100,7 @@ int open_file_or_dir3(const char *fname, DIR **dirstream, 
int open_flags);
 void close_file_or_dir(int fd, DIR *dirstream);
 int get_fs_info(const char *path, struct btrfs_ioctl_fs_info_args *fi_args,
struct btrfs_ioctl_dev_info_args **di_ret);
+int get_fsid(const char *path, u8 *fsid, int silent);
 int get_label(const char *btrfs_dev, char *label);
 int set_label(const char *btrfs_dev, const char *label);
 
-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/5] btrfs-progs: subvol: exchange subvol del --commit-after and --commit-each

2017-09-26 Thread Misono, Tomohiro
Current code is reversed in --commit-after and --commit-each operation.
i.e. --commit-after means --commit-each actually. This patch fix this
and also introduces enum type for more readable code.

Signed-off-by: Tomohiro Misono 
Reviewed-by: Qu Wenruo 
---
 cmds-subvolume.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 666f6e0..9e561f3 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -263,6 +263,7 @@ static int cmd_subvol_delete(int argc, char **argv)
DIR *dirstream = NULL;
int verbose = 0;
int commit_mode = 0;
+   enum { COMMIT_AFTER = 1, COMMIT_EACH = 2 };
 
while (1) {
int c;
@@ -279,10 +280,10 @@ static int cmd_subvol_delete(int argc, char **argv)
 
switch(c) {
case 'c':
-   commit_mode = 1;
+   commit_mode = COMMIT_AFTER;
break;
case 'C':
-   commit_mode = 2;
+   commit_mode = COMMIT_EACH;
break;
case 'v':
verbose++;
@@ -298,7 +299,7 @@ static int cmd_subvol_delete(int argc, char **argv)
if (verbose > 0) {
printf("Transaction commit: %s\n",
!commit_mode ? "none (default)" :
-   commit_mode == 1 ? "at the end" : "after each");
+   commit_mode == COMMIT_AFTER ? "at the end" : "after 
each");
}
 
cnt = optind;
@@ -338,7 +339,7 @@ again:
}
 
printf("Delete subvolume (%s): '%s/%s'\n",
-   commit_mode == 2 || (commit_mode == 1 && cnt + 1 == argc)
+   commit_mode == COMMIT_EACH || (commit_mode == COMMIT_AFTER && 
cnt + 1 == argc)
? "commit" : "no-commit", dname, vname);
memset(, 0, sizeof(args));
strncpy_null(args.name, vname);
@@ -350,7 +351,7 @@ again:
goto out;
}
 
-   if (commit_mode == 1) {
+   if (commit_mode == COMMIT_EACH) {
res = wait_for_commit(fd);
if (res < 0) {
error("unable to wait for commit after '%s': %s",
@@ -373,7 +374,7 @@ out:
goto again;
}
 
-   if (commit_mode == 2 && fd != -1) {
+   if (commit_mode == COMMIT_AFTER && fd != -1) {
res = wait_for_commit(fd);
if (res < 0) {
error("unable to do final sync after deletion: %s",
-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 0/5] btrfs-progs: subvol: fix del --commit-after

2017-09-26 Thread Misono, Tomohiro
Fix subvol del --commit-after to work properly:
 - SYNC ioctl will be issued even when last delete is failed
 - SYNC ioctl will be issued on each file system only once in the end

To achieve this, each deleted subvol's (parent's) fsid is checked each
time. If the fsid is seen for the first time, its fd will be kept in order
to issue SYNC ioctl in the end.

There already exists get_fsid() in cmds-property.c and seen_fsid which
keeps fsid in hush function in cmds-filesystem.c. This patch utilizes
them.

First patch is the independent but critical. Current code is reversed in
--commit-after and --commit-each operation. i.e. --commit-after means
--commit-each actually. The patch fix this.

2rd to 4th patches make functions stated above to common and last one is
the main part.

Thanks Qu for reviewing whole patches.

change to v2:
split the cleanup part of 4th patch and make it the independent patch
(1st patch in new series).

Tomohiro Misono (5):
  btrfs-progs: subvol: exchange subvol del --commit-after and --commit-each
  btrfs-progs: move get_fsid() to util.c
  btrfs-progs: move seen_fsid to util.c
  btrfs-progs: change seen_fsid to hold fd and DIR*
  btrfs-progs: subvol: fix subvol del --commit-after

 cmds-filesystem.c |  88 +++--
 cmds-property.c   |  30 
 cmds-subvolume.c  |  72 +++--
 utils.c   | 105 ++
 utils.h   |  15 
 5 files changed, 179 insertions(+), 131 deletions(-)

-- 
2.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] btrfs-progs: change seen_fsid to hold fd and DIR*

2017-09-26 Thread Qu Wenruo



On 2017年09月27日 08:42, Misono, Tomohiro wrote:

On 2017/09/26 22:08, Qu Wenruo wrote:



On 2017年09月26日 13:45, Misono, Tomohiro wrote:

Change seen_fsid to hold fd and DIR* in order to keep access to each fs.
This will be used for 'subvol delete --commit-after'.


It is already quite good, good enough for the fix.

However just a small point can be further enhanced, commended below.



Signed-off-by: Tomohiro Misono 
---
   cmds-filesystem.c | 4 ++--
   utils.c   | 6 +-
   utils.h   | 5 -
   3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index c7dae40..4bbff43 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -277,7 +277,7 @@ static void print_one_uuid(struct btrfs_fs_devices 
*fs_devices,
u64 devs_found = 0;
u64 total;
   
-	if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash))

+   if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash, -1, NULL))
return;
   
   	uuid_unparse(fs_devices->fsid, uuidbuf);

@@ -324,7 +324,7 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args 
*fs_info,
struct btrfs_ioctl_dev_info_args *tmp_dev_info;
int ret;
   
-	ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash);

+   ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash, -1, NULL);
if (ret == -EEXIST)
return 0;
else if (ret)
diff --git a/utils.c b/utils.c
index f91d41e..bdfbfe0 100644
--- a/utils.c
+++ b/utils.c
@@ -1804,7 +1804,8 @@ int is_seen_fsid(u8 *fsid, struct seen_fsid 
*seen_fsid_hash[])
return 0;
   }
   
-int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[])

+int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
+   int fd, DIR *dirstream)
   {
u8 hash = fsid[0];
int slot = hash % SEEN_FSID_HASH_SIZE;
@@ -1832,6 +1833,8 @@ insert:
   
   	alloc->next = NULL;

memcpy(alloc->fsid, fsid, BTRFS_FSID_SIZE);
+   alloc->fd = fd;
+   alloc->dirstream = dirstream;
   
   	if (seen)

seen->next = alloc;
@@ -1851,6 +1854,7 @@ void free_seen_fsid(struct seen_fsid *seen_fsid_hash[])
seen = seen_fsid_hash[slot];
while (seen) {
next = seen->next;
+   close_file_or_dir(seen->fd, seen->dirstream);
free(seen);
seen = next;
}
diff --git a/utils.h b/utils.h
index da34e6c..bac7688 100644
--- a/utils.h
+++ b/utils.h
@@ -107,9 +107,12 @@ int get_fsid(const char *path, u8 *fsid, int silent);
   struct seen_fsid {
u8 fsid[BTRFS_FSID_SIZE];
struct seen_fsid *next;
+   int fd;


Will it be possible that the final fd recorded here is invalid or some
other reason that we failed to execute SYNC ioctl on that fd, but can
succeeded with other fd?

In that case, a list of fd will help.

Thanks,
Qu


Hello,

I think fd will not be invalidated unless user does because open is
succeeded. Also, if SYNC is failed for one fd, it would fail for other fds
too. So, I think there is no need to keep several fds. What do you think?


Makes sense.

So I'm OK using current method.
Feel free to add my reviewed-by tag.

Reviewed-by: Qu Wenruo 

Thanks,
Qu



By the way, thanks for reviewing whole patches and comments.
I will splits the cleanup for the fourth patch.

Regards,
Tomohiro


+   DIR *dirstream;
   };
   int is_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
-int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
+int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
+   int fd, DIR *dirstream);
   void free_seen_fsid(struct seen_fsid *seen_fsid_hash[]);
   
   int get_label(const char *btrfs_dev, char *label);







--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] btrfs-progs: change seen_fsid to hold fd and DIR*

2017-09-26 Thread Misono, Tomohiro
On 2017/09/26 22:08, Qu Wenruo wrote:
> 
> 
> On 2017年09月26日 13:45, Misono, Tomohiro wrote:
>> Change seen_fsid to hold fd and DIR* in order to keep access to each fs.
>> This will be used for 'subvol delete --commit-after'.
> 
> It is already quite good, good enough for the fix.
> 
> However just a small point can be further enhanced, commended below.
> 
>>
>> Signed-off-by: Tomohiro Misono 
>> ---
>>   cmds-filesystem.c | 4 ++--
>>   utils.c   | 6 +-
>>   utils.h   | 5 -
>>   3 files changed, 11 insertions(+), 4 deletions(-)
>>
>> diff --git a/cmds-filesystem.c b/cmds-filesystem.c
>> index c7dae40..4bbff43 100644
>> --- a/cmds-filesystem.c
>> +++ b/cmds-filesystem.c
>> @@ -277,7 +277,7 @@ static void print_one_uuid(struct btrfs_fs_devices 
>> *fs_devices,
>>  u64 devs_found = 0;
>>  u64 total;
>>   
>> -if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash))
>> +if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash, -1, NULL))
>>  return;
>>   
>>  uuid_unparse(fs_devices->fsid, uuidbuf);
>> @@ -324,7 +324,7 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args 
>> *fs_info,
>>  struct btrfs_ioctl_dev_info_args *tmp_dev_info;
>>  int ret;
>>   
>> -ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash);
>> +ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash, -1, NULL);
>>  if (ret == -EEXIST)
>>  return 0;
>>  else if (ret)
>> diff --git a/utils.c b/utils.c
>> index f91d41e..bdfbfe0 100644
>> --- a/utils.c
>> +++ b/utils.c
>> @@ -1804,7 +1804,8 @@ int is_seen_fsid(u8 *fsid, struct seen_fsid 
>> *seen_fsid_hash[])
>>  return 0;
>>   }
>>   
>> -int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[])
>> +int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
>> +int fd, DIR *dirstream)
>>   {
>>  u8 hash = fsid[0];
>>  int slot = hash % SEEN_FSID_HASH_SIZE;
>> @@ -1832,6 +1833,8 @@ insert:
>>   
>>  alloc->next = NULL;
>>  memcpy(alloc->fsid, fsid, BTRFS_FSID_SIZE);
>> +alloc->fd = fd;
>> +alloc->dirstream = dirstream;
>>   
>>  if (seen)
>>  seen->next = alloc;
>> @@ -1851,6 +1854,7 @@ void free_seen_fsid(struct seen_fsid *seen_fsid_hash[])
>>  seen = seen_fsid_hash[slot];
>>  while (seen) {
>>  next = seen->next;
>> +close_file_or_dir(seen->fd, seen->dirstream);
>>  free(seen);
>>  seen = next;
>>  }
>> diff --git a/utils.h b/utils.h
>> index da34e6c..bac7688 100644
>> --- a/utils.h
>> +++ b/utils.h
>> @@ -107,9 +107,12 @@ int get_fsid(const char *path, u8 *fsid, int silent);
>>   struct seen_fsid {
>>  u8 fsid[BTRFS_FSID_SIZE];
>>  struct seen_fsid *next;
>> +int fd;
> 
> Will it be possible that the final fd recorded here is invalid or some 
> other reason that we failed to execute SYNC ioctl on that fd, but can 
> succeeded with other fd?
> 
> In that case, a list of fd will help.
> 
> Thanks,
> Qu
> 
Hello,

I think fd will not be invalidated unless user does because open is
succeeded. Also, if SYNC is failed for one fd, it would fail for other fds
too. So, I think there is no need to keep several fds. What do you think?

By the way, thanks for reviewing whole patches and comments.
I will splits the cleanup for the fourth patch.

Regards,
Tomohiro

>> +DIR *dirstream;
>>   };
>>   int is_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
>> -int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
>> +int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
>> +int fd, DIR *dirstream);
>>   void free_seen_fsid(struct seen_fsid *seen_fsid_hash[]);
>>   
>>   int get_label(const char *btrfs_dev, char *label);
>>
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] fstests: btrfs/150 regression test for reading compressed data

2017-09-26 Thread Liu Bo
On Tue, Sep 26, 2017 at 04:37:52PM -0700, Liu Bo wrote:
> On Tue, Sep 26, 2017 at 05:02:36PM +0800, Eryu Guan wrote:
> > On Fri, Sep 22, 2017 at 05:21:27PM -0600, Liu Bo wrote:
> > > We had a bug in btrfs compression code which could end up with a
> > > kernel panic.
> > > 
> > > This is adding a regression test for the bug and I've also sent a
> > > kernel patch to fix the bug.
> > > 
> > > The patch is "Btrfs: fix kernel oops while reading compressed data".
> > > 
> > > Signed-off-by: Liu Bo 
> > 
> > Hmm, I can't reproduce the panic with 4.13 kernel, which doesn't have
> > the fix applied. Can you please help confirm if it panics on your test
> > environment?
> >
> 
> Yes, it is reproducible on my box, hrm...I'll be running it more times
> to double check.
> 

It worked for me...both v4.13 and v4.14.0-rc2 have the following
messages[1].

This requires two config:
CONFIG_FAULT_INJECTION=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y

Could you please check again?

[1]:
[  135.982643] run fstests btrfs/150 at 2017-09-26 16:11:27
[  136.839434] BTRFS: device fsid 9152fe7e-3006-47d5-a9b7-330af2809da7
devid 1 transid 5 /dev/sde
[  136.842082] BTRFS: device fsid 9152fe7e-3006-47d5-a9b7-330af2809da7
devid 2 transid 5 /dev/sdc
[  136.879626] BTRFS info (device sdc): use zlib compression
[  136.880263] BTRFS info (device sdc): disk space caching is enabled
[  136.880845] BTRFS info (device sdc): has skinny extents
[  136.881386] BTRFS info (device sdc): flagging fs with big metadata
feature
[  136.890763] BTRFS info (device sdc): creating UUID tree
[  137.023210] BTRFS error (device sdc): bdev /dev/sde errs: wr 0, rd 1,
flush 0, corrupt 0, gen 0
[  137.023959] BTRFS warning (device sdc): csum failed root 5 ino 257
off 136839168 csum 0x98f94189 expected csum 0xd9cece72 mirror 0
[  137.025349] [ cut here ]
[  137.025735] kernel BUG at fs/btrfs/extent_io.c:2104!
[  137.025800] [ cut here ]
[  137.025805] kernel BUG at fs/btrfs/extent_io.c:2104!

Thanks,

-liubo

> > > ---
> > > v2: - Fix ambiguous copyright.
> > > - Use /proc/$pid/make-it-fail to specify IO failure
> > > - Use bash -c to run test only when pid is odd.
> > > - Add test to dangerous group.
> > > 
> > >  tests/btrfs/150 | 103 
> > > 
> > >  tests/btrfs/150.out |   3 ++
> > >  tests/btrfs/group   |   1 +
> > >  3 files changed, 107 insertions(+)
> > >  create mode 100755 tests/btrfs/150
> > >  create mode 100644 tests/btrfs/150.out
> > > 
> > > diff --git a/tests/btrfs/150 b/tests/btrfs/150
> > > new file mode 100755
> > > index 000..8891c38
> > > --- /dev/null
> > > +++ b/tests/btrfs/150
> > > @@ -0,0 +1,103 @@
> > > +#! /bin/bash
> > > +# FS QA Test btrfs/150
> > > +#
> > > +# This is a regression test which ends up with a kernel oops in btrfs.
> > > +# It occurs when btrfs's read repair happens while reading a compressed
> > > +# extent.
> > > +# The patch to fix it is
> > > +#Btrfs: fix kernel oops while reading compressed data
> > > +#
> > > +#---
> > > +# Copyright (c) 2017 Oracle.  All Rights Reserved.
> > > +#
> > > +# This program is free software; you can redistribute it and/or
> > > +# modify it under the terms of the GNU General Public License as
> > > +# published by the Free Software Foundation.
> > > +#
> > > +# This program is distributed in the hope that it would be useful,
> > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > +# GNU General Public License for more details.
> > > +#
> > > +# You should have received a copy of the GNU General Public License
> > > +# along with this program; if not, write the Free Software Foundation,
> > > +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> > > +#---
> > > +#
> > > +
> > > +seq=`basename $0`
> > > +seqres=$RESULT_DIR/$seq
> > > +echo "QA output created by $seq"
> > > +
> > > +here=`pwd`
> > > +tmp=/tmp/$$
> > > +status=1 # failure is the default!
> > > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > > +
> > > +_cleanup()
> > > +{
> > > + cd /
> > > + rm -f $tmp.*
> > > +}
> > > +
> > > +# get standard environment, filters and checks
> > > +. ./common/rc
> > > +. ./common/filter
> > > +
> > > +# remove previous $seqres.full before test
> > > +rm -f $seqres.full
> > > +
> > > +# real QA test starts here
> > > +
> > > +# Modify as appropriate.
> > > +_supported_fs btrfs
> > > +_supported_os Linux
> > > +_require_scratch
> > > +_require_fail_make_request
> > > +_require_scratch_dev_pool 2 
> > 
> > Trailing whitespace in above line.
> > 
> > > +
> > > +SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
> > > +enable_io_failure()
> > > +{
> > > +echo 100 > $DEBUGFS_MNT/fail_make_request/probability
> > > +echo 

[josef-btrfs:new-kill-btree-inode 22/22] fs//btrfs/send.c:6425:22: warning: assignment makes pointer from integer without a cast

2017-09-26 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next.git 
new-kill-btree-inode
head:   f2213041f761c4972696a8dabfad3c8bac9ffde2
commit: f2213041f761c4972696a8dabfad3c8bac9ffde2 [22/22] btrfs: fix send ioctl 
on 32bit with 64bit kernel
config: x86_64-randconfig-x018-201739 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
git checkout f2213041f761c4972696a8dabfad3c8bac9ffde2
# save the attached .config to linux build tree
make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   fs//btrfs/send.c: In function 'btrfs_ioctl_send':
   fs//btrfs/send.c:6425:24: error: implicit declaration of function 
'compat_ptr' [-Werror=implicit-function-declaration]
  arg->clone_sources = compat_ptr(args32.clone_sources);
   ^~
>> fs//btrfs/send.c:6425:22: warning: assignment makes pointer from integer 
>> without a cast [-Wint-conversion]
  arg->clone_sources = compat_ptr(args32.clone_sources);
 ^
   cc1: some warnings being treated as errors

vim +6425 fs//btrfs/send.c

  6368  
  6369  long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_, bool 
compat)
  6370  {
  6371  int ret = 0;
  6372  struct btrfs_root *send_root = 
BTRFS_I(file_inode(mnt_file))->root;
  6373  struct btrfs_fs_info *fs_info = send_root->fs_info;
  6374  struct btrfs_root *clone_root;
  6375  struct btrfs_ioctl_send_args *arg = NULL;
  6376  struct btrfs_key key;
  6377  struct send_ctx *sctx = NULL;
  6378  u32 i;
  6379  u64 *clone_sources_tmp = NULL;
  6380  int clone_sources_to_rollback = 0;
  6381  unsigned alloc_size;
  6382  int sort_clone_roots = 0;
  6383  int index;
  6384  
  6385  if (!capable(CAP_SYS_ADMIN))
  6386  return -EPERM;
  6387  
  6388  /*
  6389   * The subvolume must remain read-only during send, protect 
against
  6390   * making it RW. This also protects against deletion.
  6391   */
  6392  spin_lock(_root->root_item_lock);
  6393  send_root->send_in_progress++;
  6394  spin_unlock(_root->root_item_lock);
  6395  
  6396  /*
  6397   * This is done when we lookup the root, it should already be 
complete
  6398   * by the time we get here.
  6399   */
  6400  WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
  6401  
  6402  /*
  6403   * Userspace tools do the checks and warn the user if it's
  6404   * not RO.
  6405   */
  6406  if (!btrfs_root_readonly(send_root)) {
  6407  ret = -EPERM;
  6408  goto out;
  6409  }
  6410  
  6411  if (compat) {
  6412  struct btrfs_ioctl_send_args_32 args32;
  6413  ret = copy_from_user(, arg_, sizeof(args32));
  6414  if (ret) {
  6415  btrfs_err(fs_info, "args32 copy failed\n");
  6416  goto out;
  6417  }
  6418  arg = kzalloc(sizeof(*arg), GFP_KERNEL);
  6419  if (!arg) {
  6420  ret = -ENOMEM;
  6421  goto out;
  6422  }
  6423  arg->send_fd = args32.send_fd;
  6424  arg->clone_sources_count = args32.clone_sources_count;
> 6425  arg->clone_sources = compat_ptr(args32.clone_sources);

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH v2] fstests: btrfs/150 regression test for reading compressed data

2017-09-26 Thread Liu Bo
On Tue, Sep 26, 2017 at 05:02:36PM +0800, Eryu Guan wrote:
> On Fri, Sep 22, 2017 at 05:21:27PM -0600, Liu Bo wrote:
> > We had a bug in btrfs compression code which could end up with a
> > kernel panic.
> > 
> > This is adding a regression test for the bug and I've also sent a
> > kernel patch to fix the bug.
> > 
> > The patch is "Btrfs: fix kernel oops while reading compressed data".
> > 
> > Signed-off-by: Liu Bo 
> 
> Hmm, I can't reproduce the panic with 4.13 kernel, which doesn't have
> the fix applied. Can you please help confirm if it panics on your test
> environment?
>

Yes, it is reproducible on my box, hrm...I'll be running it more times
to double check.

> > ---
> > v2: - Fix ambiguous copyright.
> > - Use /proc/$pid/make-it-fail to specify IO failure
> > - Use bash -c to run test only when pid is odd.
> > - Add test to dangerous group.
> > 
> >  tests/btrfs/150 | 103 
> > 
> >  tests/btrfs/150.out |   3 ++
> >  tests/btrfs/group   |   1 +
> >  3 files changed, 107 insertions(+)
> >  create mode 100755 tests/btrfs/150
> >  create mode 100644 tests/btrfs/150.out
> > 
> > diff --git a/tests/btrfs/150 b/tests/btrfs/150
> > new file mode 100755
> > index 000..8891c38
> > --- /dev/null
> > +++ b/tests/btrfs/150
> > @@ -0,0 +1,103 @@
> > +#! /bin/bash
> > +# FS QA Test btrfs/150
> > +#
> > +# This is a regression test which ends up with a kernel oops in btrfs.
> > +# It occurs when btrfs's read repair happens while reading a compressed
> > +# extent.
> > +# The patch to fix it is
> > +#  Btrfs: fix kernel oops while reading compressed data
> > +#
> > +#---
> > +# Copyright (c) 2017 Oracle.  All Rights Reserved.
> > +#
> > +# This program is free software; you can redistribute it and/or
> > +# modify it under the terms of the GNU General Public License as
> > +# published by the Free Software Foundation.
> > +#
> > +# This program is distributed in the hope that it would be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this program; if not, write the Free Software Foundation,
> > +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> > +#---
> > +#
> > +
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1   # failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> > +   cd /
> > +   rm -f $tmp.*
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/filter
> > +
> > +# remove previous $seqres.full before test
> > +rm -f $seqres.full
> > +
> > +# real QA test starts here
> > +
> > +# Modify as appropriate.
> > +_supported_fs btrfs
> > +_supported_os Linux
> > +_require_scratch
> > +_require_fail_make_request
> > +_require_scratch_dev_pool 2 
> 
> Trailing whitespace in above line.
> 
> > +
> > +SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
> > +enable_io_failure()
> > +{
> > +echo 100 > $DEBUGFS_MNT/fail_make_request/probability
> > +echo 1000 > $DEBUGFS_MNT/fail_make_request/times
> > +echo 0 > $DEBUGFS_MNT/fail_make_request/verbose
> > +echo 1 > $SYSFS_BDEV/make-it-fail
> > +}
> > +
> > +disable_io_failure()
> > +{
> > +echo 0 > $SYSFS_BDEV/make-it-fail
> > +echo 0 > $DEBUGFS_MNT/fail_make_request/probability
> > +echo 0 > $DEBUGFS_MNT/fail_make_request/times
> > +}
> > +
> > +_scratch_pool_mkfs "-d raid1 -b 1G" >> $seqres.full 2>&1
> > +
> > +# It doesn't matter which compression algorithm we use.
> > +_scratch_mount -ocompress
> > +
> > +# Create a file with all data being compressed
> > +$XFS_IO_PROG -f -c "pwrite -W 0 8K" $SCRATCH_MNT/foobar | _filter_xfs_io
> > +
> > +# Raid1 consists of two copies and btrfs decides which copy to read by 
> > reader's
> > +# %pid.  Now we inject errors to copy #1 and copy #0 is good.  We want to 
> > read
> > +# the bad copy to trigger read-repair.
> > +while [[ -z $result ]]; do
> > +   # invalidate the page cache
> > +   $XFS_IO_PROG -f -c "fadvise -d 0 8K" $SCRATCH_MNT/foobar
> 
> Does 'echo 3 > /proc/sys/vm/drop_caches' work?
>

Yes, it works, drop_caches is system-wide, while here I'm just
dropping caches on this single inode.

Or are you implying that it's 'fadvise' that makes the test fail to
show oops?

thanks,

-liubo

> Thanks,
> Eryu
> 
> > +
> > +   enable_io_failure
> > +
> > +   result=$(bash -c "
> > +   if [ \$((\$\$ % 2)) == 1 ]; then
> > +   echo 1 > /proc/\$\$/make-it-fail
> > +   exec $XFS_IO_PROG -c \"pread 0 

[josef-btrfs:new-kill-btree-inode 22/22] fs/btrfs/send.c:6412:35: error: storage size of 'args32' isn't known

2017-09-26 Thread kbuild test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next.git 
new-kill-btree-inode
head:   f2213041f761c4972696a8dabfad3c8bac9ffde2
commit: f2213041f761c4972696a8dabfad3c8bac9ffde2 [22/22] btrfs: fix send ioctl 
on 32bit with 64bit kernel
config: i386-randconfig-x000-201739 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
git checkout f2213041f761c4972696a8dabfad3c8bac9ffde2
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   fs/btrfs/send.c: In function 'btrfs_ioctl_send':
>> fs/btrfs/send.c:6412:35: error: storage size of 'args32' isn't known
  struct btrfs_ioctl_send_args_32 args32;
  ^~
>> fs/btrfs/send.c:6425:24: error: implicit declaration of function 
>> 'compat_ptr' [-Werror=implicit-function-declaration]
  arg->clone_sources = compat_ptr(args32.clone_sources);
   ^~
   fs/btrfs/send.c:6412:35: warning: unused variable 'args32' 
[-Wunused-variable]
  struct btrfs_ioctl_send_args_32 args32;
  ^~
   cc1: some warnings being treated as errors

vim +6412 fs/btrfs/send.c

  6368  
  6369  long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_, bool 
compat)
  6370  {
  6371  int ret = 0;
  6372  struct btrfs_root *send_root = 
BTRFS_I(file_inode(mnt_file))->root;
  6373  struct btrfs_fs_info *fs_info = send_root->fs_info;
  6374  struct btrfs_root *clone_root;
  6375  struct btrfs_ioctl_send_args *arg = NULL;
  6376  struct btrfs_key key;
  6377  struct send_ctx *sctx = NULL;
  6378  u32 i;
  6379  u64 *clone_sources_tmp = NULL;
  6380  int clone_sources_to_rollback = 0;
  6381  unsigned alloc_size;
  6382  int sort_clone_roots = 0;
  6383  int index;
  6384  
  6385  if (!capable(CAP_SYS_ADMIN))
  6386  return -EPERM;
  6387  
  6388  /*
  6389   * The subvolume must remain read-only during send, protect 
against
  6390   * making it RW. This also protects against deletion.
  6391   */
  6392  spin_lock(_root->root_item_lock);
  6393  send_root->send_in_progress++;
  6394  spin_unlock(_root->root_item_lock);
  6395  
  6396  /*
  6397   * This is done when we lookup the root, it should already be 
complete
  6398   * by the time we get here.
  6399   */
  6400  WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
  6401  
  6402  /*
  6403   * Userspace tools do the checks and warn the user if it's
  6404   * not RO.
  6405   */
  6406  if (!btrfs_root_readonly(send_root)) {
  6407  ret = -EPERM;
  6408  goto out;
  6409  }
  6410  
  6411  if (compat) {
> 6412  struct btrfs_ioctl_send_args_32 args32;
  6413  ret = copy_from_user(, arg_, sizeof(args32));
  6414  if (ret) {
  6415  btrfs_err(fs_info, "args32 copy failed\n");
  6416  goto out;
  6417  }
  6418  arg = kzalloc(sizeof(*arg), GFP_KERNEL);
  6419  if (!arg) {
  6420  ret = -ENOMEM;
  6421  goto out;
  6422  }
  6423  arg->send_fd = args32.send_fd;
  6424  arg->clone_sources_count = args32.clone_sources_count;
> 6425  arg->clone_sources = compat_ptr(args32.clone_sources);

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: dmesg: csum "varying number" expected csum 0x0 mirror 1 (with trace/oops)

2017-09-26 Thread Liu Bo
On Wed, Sep 27, 2017 at 12:21:55AM +0200, Kai Krakow wrote:
> Hello!
> 
> I came across noting some kernel messages which seem to be related to
> btrfs, should I worry?
> 
> I'm currently running scrub on the device now.
> 
> inode-resolve points to an unimportant, easily recoverable file:
> 
> $ sudo btrfs inspect-internal inode-resolve -v 1229624 
> /mnt/btrfs-pool/gentoo/usr/portage/
> ioctl ret=0, bytes_left=4033, bytes_missing=0, cnt=1, missed=0
> /mnt/btrfs-pool/gentoo/usr/portage//packages/dev-lang/tcl/tcl-8.6.6-2.xpak
> 
> The file wasn't modified in month and it has been in order previously
> because the backup didn't choke on it:
> 
> $ ls -alt
> -rw-r--r-- 1 root root 11241835 19. Apr 22:11 
> /mnt/btrfs-pool/gentoo/usr/portage//packages/dev-lang/tcl/tcl-8.6.6-2.xpak
> 
> I can read the file without problems, no new messages in dmesg:
> 
> $ cat 
> /mnt/btrfs-pool/gentoo/usr/portage//packages/dev-lang/tcl/tcl-8.6.6-2.xpak 
> >/dev/null
> 
> What's strange is the long time gaps between the btrfs reports and the
> kernel backtraces... Also, expected csum=0 looks strange.
> 
> That mount is subvol_id=0. Not sure if inode-resolve works that way. I
> retried for the important subvolumes and it resolved for none.
> 
> Just in case, I have a backlog of multiple daily backups.
> 
> $ uname -a
> Linux jupiter 4.12.14-ck #1 SMP PREEMPT Fri Sep 22 02:47:44 CEST 2017
> x86_64 Intel(R) Core(TM) i7-3770K CPU @ 3.50GHz GenuineIntel GNU/Linux
> 
> 
> [88597.792462] BTRFS info (device bcache48): no csum found for inode 1229624 
> start 1650688
> [88597.793304] BTRFS warning (device bcache48): csum failed root 280 ino 
> 1229624 off 1650688 csum 0x953c1e92 expected csum 0x mirror 1
> [128569.451376] [ cut here ]
> [128569.451382] WARNING: CPU: 7 PID: 146 at kernel/workqueue.c:2041 
> process_one_work+0x44/0x310
> [128569.451383] Modules linked in: cifs ccm fuse bridge stp llc veth rfcomm 
> bnep cachefiles btusb af_packet btintel iTCO_wdt bluetooth tun 
> iTCO_vendor_support rfkill ecdh_generic kvm_intel snd_hda_codec_hdmi 
> snd_hda_codec_realtek kvm snd_hda_codec_generic snd_hda_intel snd_hda_codec 
> snd_hda_core rtc_cmos snd_pcm snd_timer snd soundcore lpc_ich irqbypass uas 
> usb_storage r8168(O) nvidia_drm(PO) vboxpci(O) vboxnetadp(O) vboxnetflt(O) 
> vboxdrv(O) nvidia_modeset(PO) nvidia(PO) nct6775 hwmon_vid coretemp hwmon 
> efivarfs
> [128569.451406] CPU: 7 PID: 146 Comm: bcache Tainted: P   O
> 4.12.14-ck #1
> [128569.451407] Hardware name: To Be Filled By O.E.M. To Be Filled By 
> O.E.M./Z68 Pro3, BIOS L2.16A 02/22/2013
> [128569.451410] task: 880419bf8bc0 task.stack: c95c4000
> [128569.451412] RIP: 0010:process_one_work+0x44/0x310
> [128569.451412] RSP: :c95c7e78 EFLAGS: 00013002
> [128569.451413] RAX: 0007 RBX: 880419bdcf00 RCX: 
> 88042f2d8020
> [128569.451414] RDX: 88042f2d8018 RSI: 880120454f08 RDI: 
> 880419bdcf00
> [128569.451415] RBP: 88042f2d8000 R08:  R09: 
> 
> [128569.451415] R10: 8a209a609e79 R11:  R12: 
> 
> [128569.451416] R13: 88042f2e1b00 R14: 88042f2e1b80 R15: 
> 880419bdcf30
> [128569.451417] FS:  () GS:88042f3c() 
> knlGS:
> [128569.451418] CS:  0010 DS:  ES:  CR0: 80050033
> [128569.451418] CR2: 00b0 CR3: 0003f23ef000 CR4: 
> 001406e0
> [128569.451419] Call Trace:
> [128569.451422]  ? rescuer_thread+0x20b/0x370
> [128569.451424]  ? kthread+0xf2/0x130
> [128569.451425]  ? process_one_work+0x310/0x310
> [128569.451426]  ? kthread_create_on_node+0x40/0x40
> [128569.451428]  ? ret_from_fork+0x22/0x30
> [128569.451429] Code: 04 b8 00 00 00 00 4c 0f 44 e8 49 8b 45 08 44 8b a0 00 
> 01 00 00 41 83 e4 20 f6 45 10 04 75 0e 65 8b 05 79 38 f7 7e 3b 45 04 74 02 
> <0f> ff 48 ba eb 83 b5 80 46 86 c8 61 48 0f af d6 48 c1 ea 3a 48 
> [128569.451449] ---[ end trace d00a1585e5166d18 ]---
> [148997.934146] BUG: unable to handle kernel paging request at 
> c96af000
> [148997.934154] IP: memcpy_erms+0x6/0x10
> [148997.934155] PGD 41d021067
> [148997.934156] P4D 41d021067
> [148997.934157] PUD 41d022067
> [148997.934158] PMD 41961c067
> [148997.934158] PTE 0
> [148997.934162] Oops: 0002 [#1] PREEMPT SMP
> [148997.934163] Modules linked in: cifs ccm fuse bridge stp llc veth rfcomm 
> bnep cachefiles btusb af_packet btintel iTCO_wdt bluetooth tun 
> iTCO_vendor_support rfkill ecdh_generic kvm_intel snd_hda_codec_hdmi 
> snd_hda_codec_realtek kvm snd_hda_codec_generic snd_hda_intel snd_hda_codec 
> snd_hda_core rtc_cmos snd_pcm snd_timer snd soundcore lpc_ich irqbypass uas 
> usb_storage r8168(O) nvidia_drm(PO) vboxpci(O) vboxnetadp(O) vboxnetflt(O) 
> vboxdrv(O) nvidia_modeset(PO) nvidia(PO) nct6775 hwmon_vid coretemp hwmon 
> efivarfs
> [148997.934188] CPU: 6 PID: 966 Comm: kworker/u16:16 Tainted: PW  O   
>  4.12.14-ck 

Re: 4.13.3 still has the out of space kernel oops

2017-09-26 Thread Kai Krakow
Am Wed, 27 Sep 2017 00:45:04 +0200
schrieb Ian Kumlien :

> I just had my laptop hit the out of space kernel oops which it kinda
> hard to recover from
> 
> Everything states "out of disk" even with 20 gigs free (both according
> to df and btrfs fi df)

You should run balance from time to time. I can suggest the auto
balance script from here:

https://www.spinics.net/lists/linux-btrfs/msg52076.html

It can be run unattended on a regular basis.


> So I'm suspecting that i need to run btrfs check on it to recover the
> lost space (i have mounted it with clear_cache and nothing)

I don't think that "btrfs check" would recover free space, that's not a
file system corruption, it's an allocation issue due to unbalanced
chunks.


> The problem is, finally getting a shell with rd.shell rd.break=cmdline
> - systemd is still a pain and since it's "udev" it's not allowing me
> to do cryptsetup luksopen due to "dependencies"

Does "emergency" as a cmdline work? It should boot to emergency mode of
systemd. Also, "recovery" as a cmdline could work, boots to a different
mode. Both work for me using dracut on Gentoo with systemd.


> Basically, btrfs check should be able to run on a ro mounted
> fileystem, this is too hard to get working without having to bring a
> live usb stick atm

I think this is possible in the latest version but only running in
non-repair mode.


-- 
Regards,
Kai

Replies to list-only preferred.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Give up on bcache?

2017-09-26 Thread Ferry Toth
Op Tue, 26 Sep 2017 15:52:44 -0400, schreef Austin S. Hemmelgarn:

> On 2017-09-26 12:50, Ferry Toth wrote:
>> Looking at the Phoronix benchmark here:
>> 
>> https://www.phoronix.com/scan.php?page=article=linux414-bcache-
>> raid=2
>> 
>> I think it might be idle hopes to think bcache can be used as a ssd
>> cache for btrfs to significantly improve performance.. True, the
>> benchmark is using ext.
> It's a benchmark.  They're inherently synthetic and workload specific,
> and therefore should not be trusted to represent things accurately for
> arbitrary use cases.

So what. A decent benchmark tries to measure a specific aspect of the fs.

I think you agree that applications doing lots of fsyncs (databases, 
dpkg) are slow on btrfs especially on hdd's, whatever way you measure 
that (it feels slow, it measures slow, it really is slow).

On a ssd the problem is less.

So if you can fix that by using a ssd cache or a hybrid solution, how 
would you like to compare that? It _feels_ faster?

>> But the most important one (where btrfs always shows to be a little
>> slow)
>> would be the SQLLite test. And with ext at least performance _degrades_
>> except for the Writeback mode, and even there is nowhere near what the
>> SSD is capable of.
> And what makes you think it will be?  You're using it as a hot-data
> cache, not a dedicated write-back cache, and you have the overhead from
> bcache itself too.  Just some simple math based on examining the bcache
> code suggests you can't get better than about 98% of the SSD's
> performance if you're lucky, and I'd guess it's more like 80% most of
> the time.
>> 
>> I think with btrfs it will be even worse and that it is a fundamental
>> problem: caching is complex and the cache can not how how the data on
>> the fs is used.
> Actually, the improvement from using bcache with BTRFS is higher
> proportionate to the baseline of not using it by a small margin than it
> is when used with ext4.  BTRFS does a lot more with the disk, so you
> have a lot more time spent accessing the disk, and thus more time that
> can be reduced by improving disk performance.  While the CoW nature of
> BTRFS does somewhat mitigate the performance improvement from using
> bcache, it does not completely negate it.

I would like to reverse this, how much degradation do you suffer from 
btrfs on a ssd as baseline compared to btrfs on a mixed ssd/hdd system.

IMHO you are hoping to get ssd performance at hdd cost.  

>> I think the original idea of hot data tracking has a much better chance
>> to significantly improve performance. This of course as the SSD's and
>> HDD's then will be equal citizens and btrfs itself gets to decide on
>> which drive the data is best stored.
> First, the user needs to decide, not BTRFS (at least, by default, BTRFS
> should not be involved in the decision).  Second, tiered storage (that's
> what that's properly called) is mostly orthogonal to caching (though
> bcache and dm-cache behave like tiered storage once the cache is
> warmed).

So, on your desktop you really are going to seach for all sqllite, mysql 
and psql files, dpkg files etc. and move them to the ssd? You can already 
do that. Go ahead! 

The big win would be if the file system does that automatically for you.

>> With this implemented right, it would also finally silence the never
>> ending discussion why not btrfs and why zfs, ext, xfs etc. Which would
>> be a plus by its own right.
> Even with this, there would still be plenty of reasons to pick one of
> those filesystems over BTRFS.  There would however be one more reason to
> pick BTRFS over ext or XFS (but necessarily not ZFS, it already has
> caching built in).

Exactly, one more advantage of btrfs and one less of zfs.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


4.13.3 still has the out of space kernel oops

2017-09-26 Thread Ian Kumlien
Hi,

I just had my laptop hit the out of space kernel oops which it kinda
hard to recover from

Everything states "out of disk" even with 20 gigs free (both according
to df and btrfs fi df)

So I'm suspecting that i need to run btrfs check on it to recover the
lost space (i have mounted it with clear_cache and nothing)

The problem is, finally getting a shell with rd.shell rd.break=cmdline
- systemd is still a pain and since it's "udev" it's not allowing me
to do cryptsetup luksopen due to "dependencies"

Basically, btrfs check should be able to run on a ro mounted
fileystem, this is too hard to get working without having to bring a
live usb stick atm

=/
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


dmesg: csum "varying number" expected csum 0x0 mirror 1 (with trace/oops)

2017-09-26 Thread Kai Krakow
Hello!

I came across noting some kernel messages which seem to be related to
btrfs, should I worry?

I'm currently running scrub on the device now.

inode-resolve points to an unimportant, easily recoverable file:

$ sudo btrfs inspect-internal inode-resolve -v 1229624 
/mnt/btrfs-pool/gentoo/usr/portage/
ioctl ret=0, bytes_left=4033, bytes_missing=0, cnt=1, missed=0
/mnt/btrfs-pool/gentoo/usr/portage//packages/dev-lang/tcl/tcl-8.6.6-2.xpak

The file wasn't modified in month and it has been in order previously
because the backup didn't choke on it:

$ ls -alt
-rw-r--r-- 1 root root 11241835 19. Apr 22:11 
/mnt/btrfs-pool/gentoo/usr/portage//packages/dev-lang/tcl/tcl-8.6.6-2.xpak

I can read the file without problems, no new messages in dmesg:

$ cat 
/mnt/btrfs-pool/gentoo/usr/portage//packages/dev-lang/tcl/tcl-8.6.6-2.xpak 
>/dev/null

What's strange is the long time gaps between the btrfs reports and the
kernel backtraces... Also, expected csum=0 looks strange.

That mount is subvol_id=0. Not sure if inode-resolve works that way. I
retried for the important subvolumes and it resolved for none.

Just in case, I have a backlog of multiple daily backups.

$ uname -a
Linux jupiter 4.12.14-ck #1 SMP PREEMPT Fri Sep 22 02:47:44 CEST 2017
x86_64 Intel(R) Core(TM) i7-3770K CPU @ 3.50GHz GenuineIntel GNU/Linux


[88597.792462] BTRFS info (device bcache48): no csum found for inode 1229624 
start 1650688
[88597.793304] BTRFS warning (device bcache48): csum failed root 280 ino 
1229624 off 1650688 csum 0x953c1e92 expected csum 0x mirror 1
[128569.451376] [ cut here ]
[128569.451382] WARNING: CPU: 7 PID: 146 at kernel/workqueue.c:2041 
process_one_work+0x44/0x310
[128569.451383] Modules linked in: cifs ccm fuse bridge stp llc veth rfcomm 
bnep cachefiles btusb af_packet btintel iTCO_wdt bluetooth tun 
iTCO_vendor_support rfkill ecdh_generic kvm_intel snd_hda_codec_hdmi 
snd_hda_codec_realtek kvm snd_hda_codec_generic snd_hda_intel snd_hda_codec 
snd_hda_core rtc_cmos snd_pcm snd_timer snd soundcore lpc_ich irqbypass uas 
usb_storage r8168(O) nvidia_drm(PO) vboxpci(O) vboxnetadp(O) vboxnetflt(O) 
vboxdrv(O) nvidia_modeset(PO) nvidia(PO) nct6775 hwmon_vid coretemp hwmon 
efivarfs
[128569.451406] CPU: 7 PID: 146 Comm: bcache Tainted: P   O
4.12.14-ck #1
[128569.451407] Hardware name: To Be Filled By O.E.M. To Be Filled By 
O.E.M./Z68 Pro3, BIOS L2.16A 02/22/2013
[128569.451410] task: 880419bf8bc0 task.stack: c95c4000
[128569.451412] RIP: 0010:process_one_work+0x44/0x310
[128569.451412] RSP: :c95c7e78 EFLAGS: 00013002
[128569.451413] RAX: 0007 RBX: 880419bdcf00 RCX: 
88042f2d8020
[128569.451414] RDX: 88042f2d8018 RSI: 880120454f08 RDI: 
880419bdcf00
[128569.451415] RBP: 88042f2d8000 R08:  R09: 

[128569.451415] R10: 8a209a609e79 R11:  R12: 

[128569.451416] R13: 88042f2e1b00 R14: 88042f2e1b80 R15: 
880419bdcf30
[128569.451417] FS:  () GS:88042f3c() 
knlGS:
[128569.451418] CS:  0010 DS:  ES:  CR0: 80050033
[128569.451418] CR2: 00b0 CR3: 0003f23ef000 CR4: 
001406e0
[128569.451419] Call Trace:
[128569.451422]  ? rescuer_thread+0x20b/0x370
[128569.451424]  ? kthread+0xf2/0x130
[128569.451425]  ? process_one_work+0x310/0x310
[128569.451426]  ? kthread_create_on_node+0x40/0x40
[128569.451428]  ? ret_from_fork+0x22/0x30
[128569.451429] Code: 04 b8 00 00 00 00 4c 0f 44 e8 49 8b 45 08 44 8b a0 00 01 
00 00 41 83 e4 20 f6 45 10 04 75 0e 65 8b 05 79 38 f7 7e 3b 45 04 74 02 <0f> ff 
48 ba eb 83 b5 80 46 86 c8 61 48 0f af d6 48 c1 ea 3a 48 
[128569.451449] ---[ end trace d00a1585e5166d18 ]---
[148997.934146] BUG: unable to handle kernel paging request at c96af000
[148997.934154] IP: memcpy_erms+0x6/0x10
[148997.934155] PGD 41d021067
[148997.934156] P4D 41d021067
[148997.934157] PUD 41d022067
[148997.934158] PMD 41961c067
[148997.934158] PTE 0
[148997.934162] Oops: 0002 [#1] PREEMPT SMP
[148997.934163] Modules linked in: cifs ccm fuse bridge stp llc veth rfcomm 
bnep cachefiles btusb af_packet btintel iTCO_wdt bluetooth tun 
iTCO_vendor_support rfkill ecdh_generic kvm_intel snd_hda_codec_hdmi 
snd_hda_codec_realtek kvm snd_hda_codec_generic snd_hda_intel snd_hda_codec 
snd_hda_core rtc_cmos snd_pcm snd_timer snd soundcore lpc_ich irqbypass uas 
usb_storage r8168(O) nvidia_drm(PO) vboxpci(O) vboxnetadp(O) vboxnetflt(O) 
vboxdrv(O) nvidia_modeset(PO) nvidia(PO) nct6775 hwmon_vid coretemp hwmon 
efivarfs
[148997.934188] CPU: 6 PID: 966 Comm: kworker/u16:16 Tainted: PW  O
4.12.14-ck #1
[148997.934190] Hardware name: To Be Filled By O.E.M. To Be Filled By 
O.E.M./Z68 Pro3, BIOS L2.16A 02/22/2013
[148997.934193] Workqueue: btrfs-endio btrfs_endio_helper
[148997.934194] task: 88001496a340 task.stack: c90011b68000
[148997.934196] RIP: 

[PATCH] btrfs: fix send ioctl on 32bit with 64bit kernel

2017-09-26 Thread Josef Bacik
We pass in a pointer in our send arg struct, this means the struct size doesn't
match with 32bit user space and 64bit kernel space.  Fix this by adding a compat
mode and doing the appropriate conversion.

Signed-off-by: Josef Bacik 
---
 fs/btrfs/ioctl.c   |  6 +-
 fs/btrfs/send.c| 34 --
 fs/btrfs/send.h|  2 +-
 include/uapi/linux/btrfs.h | 13 +
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 050d2d9c5533..9169d67e49b9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5594,7 +5594,11 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp);
 #endif
case BTRFS_IOC_SEND:
-   return btrfs_ioctl_send(file, argp);
+   return btrfs_ioctl_send(file, argp, false);
+#ifdef CONFIG_64BIT
+   case BTRFS_IOC_SEND_32:
+   return btrfs_ioctl_send(file, argp, true);
+#endif
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp);
case BTRFS_IOC_QUOTA_CTL:
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 32b043ef8ac9..2041cac1875a 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "send.h"
 #include "backref.h"
@@ -6365,7 +6366,7 @@ static void btrfs_root_dec_send_in_progress(struct 
btrfs_root* root)
spin_unlock(>root_item_lock);
 }
 
-long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
+long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_, bool compat)
 {
int ret = 0;
struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
@@ -6407,11 +6408,32 @@ long btrfs_ioctl_send(struct file *mnt_file, void 
__user *arg_)
goto out;
}
 
-   arg = memdup_user(arg_, sizeof(*arg));
-   if (IS_ERR(arg)) {
-   ret = PTR_ERR(arg);
-   arg = NULL;
-   goto out;
+   if (compat) {
+   struct btrfs_ioctl_send_args_32 args32;
+   ret = copy_from_user(, arg_, sizeof(args32));
+   if (ret) {
+   btrfs_err(fs_info, "args32 copy failed\n");
+   goto out;
+   }
+   arg = kzalloc(sizeof(*arg), GFP_KERNEL);
+   if (!arg) {
+   ret = -ENOMEM;
+   goto out;
+   }
+   arg->send_fd = args32.send_fd;
+   arg->clone_sources_count = args32.clone_sources_count;
+   arg->clone_sources = compat_ptr(args32.clone_sources);
+   arg->parent_root = args32.parent_root;
+   arg->flags = args32.flags;
+   memcpy(arg->reserved, args32.reserved,
+  sizeof(args32.reserved));
+   } else {
+   arg = memdup_user(arg_, sizeof(*arg));
+   if (IS_ERR(arg)) {
+   ret = PTR_ERR(arg);
+   arg = NULL;
+   goto out;
+   }
}
 
/*
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 02e00166c4da..d45d2471c4b6 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -130,5 +130,5 @@ enum {
 #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
 
 #ifdef __KERNEL__
-long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
+long btrfs_ioctl_send(struct file *mnt_file, void __user *arg, bool compat);
 #endif
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 378230c163d5..50b201222cfc 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -719,6 +719,19 @@ struct btrfs_ioctl_send_args {
__u64 reserved[4];  /* in */
 };
 
+#ifdef CONFIG_64BIT
+struct btrfs_ioctl_send_args_32 {
+   __s64 send_fd;  /* in */
+   __u64 clone_sources_count;  /* in */
+   __u32 clone_sources;/* in */
+   __u64 parent_root;  /* in */
+   __u64 flags;/* in */
+   __u64 reserved[4];  /* in */
+} __attribute__ ((__packed__));
+#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
+  struct btrfs_ioctl_send_args_32)
+#endif
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Btrfs performance with small blocksize on SSD

2017-09-26 Thread Peter Grandi
> i run a few performance tests comparing mdadm, hardware raid
> and the btrfs raid.

Fantastic beginning already! :-)

> I noticed that the performance

I have seen over the years a lot of messages like this where
there is a wanton display of amusing misuses of terminology, of
which the misuse of the word "performance" to mean "speed" is
common, and your results are work-per-time which is a "speed":
http://www.sabi.co.uk/blog/15-two.html?151023#151023

The "tl;dr" is: you and another guy are told to race the 100m to
win a €10,000 prize, but you have to carry a sack with a 50Kg
weight. It takes you a lot longer, as your speed is much lower,
and the other guy gets the prize. Was that because your
performance was much worse? :-)

> for small blocksizes (2k) is very bad on SSD in general and on
> HDD for sequential writing.

Your graphs show pretty decent performance for small-file IO on
Btrfs, depending on conditions, and you are very astutely not
explaining the conditions, even if some can be guessed.

> I wonder about that result, because you say on the wiki that
> btrfs is very effective for small files.

Effectivess/efficiency are not the same as performance or speed
either. My own simplistic but somewhat meaningful tests show
that Btrfs does relatively well on small files:

  http://www.sabi.co.uk/blog/17-one.html?170302#170302

As to "small files" in general I have read about many attempts
to use filesystems as DBMSes, and I consider them intensely
stupid:

  http://www.sabi.co.uk/blog/anno05-4th.html?051016#051016

> I attached my results from raid 1 random write HDD (rH1), SSD
> (rS1) and from sequential write HDD (sH1), SSD (sS1)

Ah, so it was specifically about small *writes* (and presumably
because of other wording not small-updates-in-place of large
files, but creating and writing small files).

It is a very basic beginner level notion that most storage
systems are very anisotropic as to IO size, and also for read
vs. write, and never mind with and without 'fsync'. SSDs without
supercapacitor backed buffers in particular are an issue.

Btrfs has a performance envelope where the speed of small writes
(in particular small in-place updates, but also because of POSIX
small file creation) has been sacrificed for good reasons:

https://btrfs.wiki.kernel.org/index.php/SysadminGuide#Copy_on_Write_.28CoW.29
https://btrfs.wiki.kernel.org/index.php/Gotchas#Fragmentation

Also consider the consequences of the 'max_inline' option for
'mount' and the 'nodesize' option for 'mkfs.btrfs'.

> Hopefully you have an explanation for that.

The best explanation seems to me (euphemism alert) quite
extensive "misknowledge" in the message I am responding to.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Give up on bcache?

2017-09-26 Thread Adam Borowski
On Tue, Sep 26, 2017 at 11:33:19PM +0500, Roman Mamedov wrote:
> On Tue, 26 Sep 2017 16:50:00 + (UTC)
> Ferry Toth  wrote:
> 
> > https://www.phoronix.com/scan.php?page=article=linux414-bcache-
> > raid=2
> > 
> > I think it might be idle hopes to think bcache can be used as a ssd cache 
> > for btrfs to significantly improve performance..
> 
> My personal real-world experience shows that SSD caching -- with lvmcache --
> does indeed significantly improve performance of a large Btrfs filesystem with
> slowish base storage.
> 
> And that article, sadly, only demonstrates once again the general mediocre
> quality of Phoronix content: it is an astonishing oversight to not check out
> lvmcache in the same setup, to at least try to draw some useful conclusion, is
> it Bcache that is strangely deficient, or SSD caching as a general concept
> does not work well in the hardware setup utilized.

Also, it looks as if Phoronix' tests don't stress metadata at all.  Btrfs is
all about metadata, speeding it up greatly helps most workloads.

A pipe-dream wishlist would be:
* store and access master copy of metadata on SSD only
* pin all data blocks referenced by generations not yet mirrored
* slowly copy over metadata to HDD

-- 
⢀⣴⠾⠻⢶⣦⠀ We domesticated dogs 36000 years ago; together we chased
⣾⠁⢰⠒⠀⣿⡁ animals, hung out and licked or scratched our private parts.
⢿⡄⠘⠷⠚⠋⠀ Cats domesticated us 9500 years ago, and immediately we got
⠈⠳⣄ agriculture, towns then cities. -- whitroth on /.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Give up on bcache?

2017-09-26 Thread Austin S. Hemmelgarn

On 2017-09-26 12:50, Ferry Toth wrote:

Looking at the Phoronix benchmark here:

https://www.phoronix.com/scan.php?page=article=linux414-bcache-
raid=2

I think it might be idle hopes to think bcache can be used as a ssd cache
for btrfs to significantly improve performance.. True, the benchmark is
using ext.
It's a benchmark.  They're inherently synthetic and workload specific, 
and therefore should not be trusted to represent things accurately for 
arbitrary use cases.


But the most important one (where btrfs always shows to be a little slow)
would be the SQLLite test. And with ext at least performance _degrades_
except for the Writeback mode, and even there is nowhere near what the
SSD is capable of.
And what makes you think it will be?  You're using it as a hot-data 
cache, not a dedicated write-back cache, and you have the overhead from 
bcache itself too.  Just some simple math based on examining the bcache 
code suggests you can't get better than about 98% of the SSD's 
performance if you're lucky, and I'd guess it's more like 80% most of 
the time.


I think with btrfs it will be even worse and that it is a fundamental
problem: caching is complex and the cache can not how how the data on the
fs is used.
Actually, the improvement from using bcache with BTRFS is higher 
proportionate to the baseline of not using it by a small margin than it 
is when used with ext4.  BTRFS does a lot more with the disk, so you 
have a lot more time spent accessing the disk, and thus more time that 
can be reduced by improving disk performance.  While the CoW nature of 
BTRFS does somewhat mitigate the performance improvement from using 
bcache, it does not completely negate it.


I think the original idea of hot data tracking has a much better chance
to significantly improve performance. This of course as the SSD's and
HDD's then will be equal citizens and btrfs itself gets to decide on
which drive the data is best stored.
First, the user needs to decide, not BTRFS (at least, by default, BTRFS 
should not be involved in the decision).  Second, tiered storage (that's 
what that's properly called) is mostly orthogonal to caching (though 
bcache and dm-cache behave like tiered storage once the cache is warmed).


With this implemented right, it would also finally silence the never
ending discussion why not btrfs and why zfs, ext, xfs etc. Which would be
a plus by its own right.
Even with this, there would still be plenty of reasons to pick one of 
those filesystems over BTRFS.  There would however be one more reason to 
pick BTRFS over ext or XFS (but necessarily not ZFS, it already has 
caching built in).


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Give up on bcache?

2017-09-26 Thread Kai Krakow
Am Tue, 26 Sep 2017 23:33:19 +0500
schrieb Roman Mamedov :

> On Tue, 26 Sep 2017 16:50:00 + (UTC)
> Ferry Toth  wrote:
> 
> > https://www.phoronix.com/scan.php?page=article=linux414-bcache-
> > raid=2
> > 
> > I think it might be idle hopes to think bcache can be used as a ssd
> > cache for btrfs to significantly improve performance..  
> 
> My personal real-world experience shows that SSD caching -- with
> lvmcache -- does indeed significantly improve performance of a large
> Btrfs filesystem with slowish base storage.
> 
> And that article, sadly, only demonstrates once again the general
> mediocre quality of Phoronix content: it is an astonishing oversight
> to not check out lvmcache in the same setup, to at least try to draw
> some useful conclusion, is it Bcache that is strangely deficient, or
> SSD caching as a general concept does not work well in the hardware
> setup utilized.

Bcache is actually not meant to increase benchmark performance except
for very few corner cases. It is designed to improve interactivity and
perceived performance, reducing head movements. On the bcache homepage
there's actually tips on how to benchmark bcache correctly, including
warm-up phase and turning on sequential caching. Phoronix doesn't do
that, they test default settings, which is imho a good thing but you
should know the consequences and research how to turn the knobs.

Depending on the caching mode and cache size, the SQlite test may not
show real-world numbers. Also, you should optimize some btrfs options
to work correctly with bcache, e.g. force it to mount "nossd" as it
detects the bcache device as SSD - which is wrong for some workloads, I
think especially desktop workloads and most server workloads.

Also, you may want to tune udev to correct some attributes so other
applications can do their detection and behavior correctly, too:

$ cat /etc/udev/rules.d/00-ssd-scheduler.rules
ACTION=="add|change", KERNEL=="bcache*", ATTR{queue/rotational}="1"
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="0", 
ATTR{queue/iosched/slice_idle}="0"
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="0", 
ATTR{queue/scheduler}="kyber"
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="1", 
ATTR{queue/scheduler}="bfq"

Take note: on a non-mq system you may want to use noop/deadline/cfq
instead of kyber/bfq.


I'm running bcache since over two years now and the performance
improvement is very very high with boot times going down to 30-40s from
3+ minutes previously, faster app startup times (almost instantly like
on SSD), reduced noise by reduced head movements, etc. Also, it has
easy setup (no split metadata/data cache, you can attach more than one
device to a single cache), and it is rocksolid even when crashing the
system.

Bcache learns by using LRU for caching: What you don't need will be
pushed out of cache over time, what you use, stays. This is actually a
lot like "hot data caching". Given a big enough cache, everything of
your daily needs would stay in cache, easily achieving hit ratios
around 90%. Since sequential access is bypassed, you don't have to
worry to flush the cache with large copy operations.

My system uses a 512G SSD with 400G dedicated to bcache, attached to 3x
1TB HDD draid0 mraid1 btrfs, filled with 2TB of net data and daily
backups using borgbackup. Bcache runs in writeback mode, the backup
takes around 15 minutes each night to dig through all data and stores
it to an internal intermediate backup also on bcache (xfs, write-around
mode). Currently not implemented, this intermediate backup will later
be mirrored to external, off-site location.

Some of the rest of the SSD is EFI-ESP, some swap space, and
over-provisioned area to keep bcache performance high.

$ uptime && bcache-status
 21:28:44 up 3 days, 20:38,  3 users,  load average: 1,18, 1,44, 2,14
--- bcache ---
UUIDaacfbcd9-dae5-4377-92d1-6808831a4885
Block Size  4.00 KiB
Bucket Size 512.00 KiB
Congested?  False
Read Congestion 2.0ms
Write Congestion20.0ms
Total Cache Size400 GiB
Total Cache Used400 GiB (100%)
Total Cache Unused  0 B (0%)
Evictable Cache 396 GiB (99%)
Replacement Policy  [lru] fifo random
Cache Mode  (Various)
Total Hits  2364518 (89%)
Total Misses290764
Total Bypass Hits   4284468 (100%)
Total Bypass Misses 0
Total Bypassed  215 GiB


The bucket size and block size was chosen to best fit with Samsung TLC
arrangement. But this is pure theory, I never benchmarked the benefits.
I just feel more comfortable that way. ;-)


One should also keep in mind: The way how btrfs works cannot optimally
use bcache, as cow will obviously invalidate data in bcache - but
bcache doesn't have knowledge of this. Of course, such 

Re: Give up on bcache?

2017-09-26 Thread Roman Mamedov
On Tue, 26 Sep 2017 16:50:00 + (UTC)
Ferry Toth  wrote:

> https://www.phoronix.com/scan.php?page=article=linux414-bcache-
> raid=2
> 
> I think it might be idle hopes to think bcache can be used as a ssd cache 
> for btrfs to significantly improve performance..

My personal real-world experience shows that SSD caching -- with lvmcache --
does indeed significantly improve performance of a large Btrfs filesystem with
slowish base storage.

And that article, sadly, only demonstrates once again the general mediocre
quality of Phoronix content: it is an astonishing oversight to not check out
lvmcache in the same setup, to at least try to draw some useful conclusion, is
it Bcache that is strangely deficient, or SSD caching as a general concept
does not work well in the hardware setup utilized.

-- 
With respect,
Roman
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 1/2] btrfs: undo writable when sprouting fails

2017-09-26 Thread David Sterba
On Tue, Sep 26, 2017 at 08:57:47PM +0800, Qu Wenruo wrote:
> 
> 
> On 2017年09月26日 16:41, Anand Jain wrote:
> > When new device is being added to seed FS, seed FS is marked writable,
> > but when we fail to bring in the new device, we missed to undo the
> > writable part. This patch fixes it.
> > 
> > Signed-off-by: Anand Jain 
> > ---
> >   fs/btrfs/volumes.c | 2 ++
> >   1 file changed, 2 insertions(+)
> > 
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index 0e8f16c305df..9d64700cc9b6 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -2501,6 +2501,8 @@ int btrfs_init_new_device(struct btrfs_fs_info 
> > *fs_info, const char *device_path
> > return ret;
> >   
> >   error_trans:
> > +   if (seeding_dev)
> > +   sb->s_flags |= MS_RDONLY;
> 
> Still the same question.
> 
> ---
>   if (seeding_dev) {
>   mutex_unlock(_mutex);
>   up_write(>s_umount);
> 
>   if (ret) /* transaction commit */
>   return ret;
> 
>   ret = btrfs_relocate_sys_chunks(fs_info);
>   if (ret < 0)
>   btrfs_handle_fs_error(fs_info, ret,
>   "Failed to relocate sys chunks after device 
> initialization. This 
> can be fixed using the \"btrfs balance\" command.");
>   trans = btrfs_attach_transaction(root);
>   if (IS_ERR(trans)) {
>   if (PTR_ERR(trans) == -ENOENT)
>   return 0;
>   return PTR_ERR(trans);
>   }
> ---
> In the above btrfs_attch_transaction() error handler, which just 
> returned without going to error_trans tags, did we misseed the s_flag 
> revert thing?

I think so. Also I think "sb->s_flags |= MS_RDONLY;" can be moved to the
'if' after the error: label, where we already check the 'seeding_dev'
condition.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] btrfs: Abort transaction if btrfs_update_root fails in btrfs_ioctl_subvol_setflags

2017-09-26 Thread David Sterba
On Tue, Sep 26, 2017 at 05:27:41PM +0300, Nikolay Borisov wrote:
> btrfs_update_root can fail for any number of reasons, however the only error
> handling we do is to revert the modified flags, yet we do not abort the
> transaction but proceed to commit it.

AFAICS btrfs_update_root aborts the transaction itself, so it's not
needed in the caller.

> Fix this by explicitly checking if the
> update root routine has failed and abort the transaction.
> 
> Fixes: 0caa102da827 ("Btrfs: Add BTRFS_IOC_SUBVOL_GETFLAGS/SETFLAGS ioctls")
> Signed-off-by: Nikolay Borisov 
> ---
>  fs/btrfs/ioctl.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index d6715c2bcdc4..09fcd51f0e8c 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1842,8 +1842,11 @@ static noinline int btrfs_ioctl_subvol_setflags(struct 
> file *file,
>  
>   ret = btrfs_update_root(trans, fs_info->tree_root,
>   >root_key, >root_item);
> + if (ret < 0)
> + btrfs_abort_transaction(trans, ret);
>  
>   btrfs_commit_transaction(trans);

I think the error from commit_transaction should be returned as well if
we get here.

So:

ret = btrfs_update_root();

if (ret < 0) {
end_transaction();
goto out_reset;
}

ret = btrfs_commit_transaction();

out_reset:
/* and then as it is */
if (ret)
btrfs_set_root_flags(...);

etc.



> +
>  out_reset:
>   if (ret)
>   btrfs_set_root_flags(>root_item, root_flags);
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Give up on bcache?

2017-09-26 Thread Ferry Toth
Looking at the Phoronix benchmark here:

https://www.phoronix.com/scan.php?page=article=linux414-bcache-
raid=2

I think it might be idle hopes to think bcache can be used as a ssd cache 
for btrfs to significantly improve performance.. True, the benchmark is 
using ext.

But the most important one (where btrfs always shows to be a little slow) 
would be the SQLLite test. And with ext at least performance _degrades_ 
except for the Writeback mode, and even there is nowhere near what the 
SSD is capable of.

I think with btrfs it will be even worse and that it is a fundamental 
problem: caching is complex and the cache can not how how the data on the 
fs is used.

I think the original idea of hot data tracking has a much better chance 
to significantly improve performance. This of course as the SSD's and 
HDD's then will be equal citizens and btrfs itself gets to decide on 
which drive the data is best stored.

With this implemented right, it would also finally silence the never 
ending discussion why not btrfs and why zfs, ext, xfs etc. Which would be 
a plus by its own right.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: rm: it is dangerous to operate recursively on '/data/Backup/@' (same as '/')

2017-09-26 Thread Eric Wolf
So somehow my @ and @home subvolumes became mounted at /data/Backup in
addition to their normal locations (/ and /home). So when I used 'dd'
I was outputting to my OS drive instead of my data pool. How did this
happen? How do I undo it? I'll try restarting now, but I'll await
further responses before replying to myself again.
---
Eric Wolf
(201) 316-6098
19w...@gmail.com


On Tue, Sep 26, 2017 at 10:41 AM, Eric Wolf <19w...@gmail.com> wrote:
> I accidentally filled my OS drive with a copy of itself? The problem
> is, /data/ is a separate pool from the OS drive. And now it looks like
> I can't erase it? I don't even know how I got here. All I did was "dd
> if=/dev/sda of=/data/Backup/backup-new.img && mv
> /data/Backup/backup-new.img /data/Backup/backup.img" I don't really
> know where to go from here.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/4] btrfs: Add sanity check for EXTENT_DATA when reading out leaf

2017-09-26 Thread David Sterba
On Tue, Sep 26, 2017 at 08:49:55PM +0800, Qu Wenruo wrote:
> > Yeah something like that. And also make it a function, unless we're
> > going to use some macro magic (I'm not sure about that yet).
> > 
> > Schematically:
> > 
> > btrfs_report_corruption(fs_info, EXTENT_ITEM, "file item type is unknown: 
> > %d", fi->type);
> > 
> > implemented as:
> > 
> > btrfs_err(fs_info,
> > "corruption detected for item %d: "
> > "file item type is unknown: %d",
> > EXTENT_ITEM
> > fi->type);
> > 
> > Here comes the magic: as btrfs_err will print the arguments on one line
> > and adds the \n, we have to merge the string and the argument list into
> > one.
> > 
> > But if adding a separate helper similar to btrfs_err would be more
> > suitable, then ok.
> > 
> I'll try to implement it, but I'm a little afraid that it may turn out 
> to little common routine for the error report and we may need to 
> manually craft the output for each member.
> 
> But I'll keep this point in mind when updating the patchset.

Ok, let's see how far we can make it. Please send only updates on top of
this patches for now, I've merged them to misc-next (ie. "for 4.15").

> BTW, what about moving these checkers and error reporting mechanism to 
> its own C file?
> The code will definitely get larger and larger, I think moving them to 
> one separate file at the very beginning will save us more time.

Good point, please proceed.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


rm: it is dangerous to operate recursively on '/data/Backup/@' (same as '/')

2017-09-26 Thread Eric Wolf
I accidentally filled my OS drive with a copy of itself? The problem
is, /data/ is a separate pool from the OS drive. And now it looks like
I can't erase it? I don't even know how I got here. All I did was "dd
if=/dev/sda of=/data/Backup/backup-new.img && mv
/data/Backup/backup-new.img /data/Backup/backup.img" I don't really
know where to go from here.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 2/2] btrfs: Remove received_uuid during received snapshot ro->rw switch

2017-09-26 Thread Nikolay Borisov
Currently when a read-only snapshot is received and subsequently its ro property
is set to false i.e. switched to rw-mode the received_uuid of that subvol 
remains
intact. However, once the received volume is switched to RW mode we cannot
guaranteee that it contains the same data, so it makes sense to remove the
received uuid.

Signed-off-by: Nikolay Borisov 
---

This patch has been inspired by the problem described in 
https://www.spinics.net/lists/linux-btrfs/msg68879.html

 fs/btrfs/ioctl.c | 37 -
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 09fcd51f0e8c..71fd28caefdd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1811,6 +1811,17 @@ static noinline int btrfs_ioctl_subvol_setflags(struct 
file *file,
goto out_drop_sem;
 
root_flags = btrfs_root_flags(>root_item);
+
+   /*
+* 1 - root item
+* 1 - uuid item
+*/
+   trans = btrfs_start_transaction(root, 2);
+   if (IS_ERR(trans)) {
+   ret = PTR_ERR(trans);
+   goto out_drop_sem;
+   }
+
if (flags & BTRFS_SUBVOL_RDONLY) {
btrfs_set_root_flags(>root_item,
 root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
@@ -1824,30 +1835,38 @@ static noinline int btrfs_ioctl_subvol_setflags(struct 
file *file,
btrfs_set_root_flags(>root_item,
 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
spin_unlock(>root_item_lock);
+   if 
(!btrfs_is_empty_uuid(root->root_item.received_uuid)) {
+   ret = btrfs_uuid_tree_rem(trans, fs_info,
+  root->root_item.received_uuid,
+  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+  root->root_key.objectid);
+
+   if (ret && ret != -ENOENT) {
+   btrfs_abort_transaction(trans, ret);
+   goto out_end_trans;
+   }
+
+   memset(root->root_item.received_uuid, 0,
+  BTRFS_UUID_SIZE);
+   }
} else {
spin_unlock(>root_item_lock);
btrfs_warn(fs_info,
   "Attempt to set subvolume %llu read-write 
during send",
   root->root_key.objectid);
ret = -EPERM;
-   goto out_drop_sem;
+   btrfs_abort_transaction(trans, ret);
+   goto out_end_trans;
}
}
 
-   trans = btrfs_start_transaction(root, 1);
-   if (IS_ERR(trans)) {
-   ret = PTR_ERR(trans);
-   goto out_reset;
-   }
-
ret = btrfs_update_root(trans, fs_info->tree_root,
>root_key, >root_item);
if (ret < 0)
btrfs_abort_transaction(trans, ret);
 
+out_end_trans:
btrfs_commit_transaction(trans);
-
-out_reset:
if (ret)
btrfs_set_root_flags(>root_item, root_flags);
 out_drop_sem:
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] btrfs: Abort transaction if btrfs_update_root fails in btrfs_ioctl_subvol_setflags

2017-09-26 Thread Nikolay Borisov
btrfs_update_root can fail for any number of reasons, however the only error
handling we do is to revert the modified flags, yet we do not abort the
transaction but proceed to commit it. Fix this by explicitly checking if the
update root routine has failed and abort the transaction.

Fixes: 0caa102da827 ("Btrfs: Add BTRFS_IOC_SUBVOL_GETFLAGS/SETFLAGS ioctls")
Signed-off-by: Nikolay Borisov 
---
 fs/btrfs/ioctl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d6715c2bcdc4..09fcd51f0e8c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1842,8 +1842,11 @@ static noinline int btrfs_ioctl_subvol_setflags(struct 
file *file,
 
ret = btrfs_update_root(trans, fs_info->tree_root,
>root_key, >root_item);
+   if (ret < 0)
+   btrfs_abort_transaction(trans, ret);
 
btrfs_commit_transaction(trans);
+
 out_reset:
if (ret)
btrfs_set_root_flags(>root_item, root_flags);
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/4] btrfs-progs: subvol: fix del --commit-after

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 13:44, Misono, Tomohiro wrote:

Fix subvol del --commit-after to work properly:
  - SYNC ioctl will be issued even when last delete is failed
  - SYNC ioctl will be issued on each file system only once in the end

To achieve this, each deleted subvol's (parent's) fsid is checked each
time. If the fsid is seen for the first time, its fd will be kept in order
to issue SYNC ioctl in the end.

There already exists get_fsid() in cmds-property.c and seen_fsid which
keeps fsid in hush function in cmds-filesystem.c. First three patches
make them to common functions and last one is the main part.


Much more elegant than my expectation.
Clean and short fix.

Only small suggestion to use fd list other than using the last valid fd 
(for patch 3), and a possible patch split (for patch 4).


Looks good overall.
You can add my reviewed-by tag after splitting cleanup patch.

Good job.

Thanks,
Qu


Tomohiro Misono (4):
   btrfs-progs: move get_fsid() to util.c
   btrfs-progs: move seen_fsid to util.c
   btrfs-progs: change seen_fsid to hold fd and DIR*
   btrfs-progs: subvol: fix subvol del --commit-after

  cmds-filesystem.c |  88 +++--
  cmds-property.c   |  30 
  cmds-subvolume.c  |  77 ---
  utils.c   | 105 ++
  utils.h   |  15 
  5 files changed, 181 insertions(+), 134 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] btrfs-progs: subvol: fix subvol del --commit-after

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 13:46, Misono, Tomohiro wrote:

Fix subvol del --commit-after to work properly:
  - SYNC ioctl will be issued even when last delete is failed
  - SYNC ioctl will be issued on each file system only once in the end

To achieve this, get_fsid() and add_seen_fsid() is called after each delete
to keep only one fd for each fs.

In the end, seen_fsid_hash will be traversed and SYNC is issued on each fs.

Signed-off-by: Tomohiro Misono 
---
  cmds-subvolume.c | 77 
  1 file changed, 56 insertions(+), 21 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 666f6e0..bcbd737 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -263,6 +263,10 @@ static int cmd_subvol_delete(int argc, char **argv)
DIR *dirstream = NULL;
int verbose = 0;
int commit_mode = 0;
+   u8 fsid[BTRFS_FSID_SIZE];
+   char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
+   struct seen_fsid *seen_fsid_hash[SEEN_FSID_HASH_SIZE] = {NULL,};
+   enum { COMMIT_AFTER = 1, COMMIT_EACH = 2 };
  
  	while (1) {

int c;
@@ -279,10 +283,10 @@ static int cmd_subvol_delete(int argc, char **argv)
  
  		switch(c) {

case 'c':
-   commit_mode = 1;
+   commit_mode = COMMIT_AFTER;
break;
case 'C':
-   commit_mode = 2;
+   commit_mode = COMMIT_EACH;


The commit_mode cleanup can be a separate patch.
(More patches always look cooler)

Other part looks good enough.
Feel free to add my reviewed by tags after splitting the cleanup patch.

Reviewed-by: Qu Wenruo 

Thanks,
Qu

break;
case 'v':
verbose++;
@@ -298,7 +302,7 @@ static int cmd_subvol_delete(int argc, char **argv)
if (verbose > 0) {
printf("Transaction commit: %s\n",
!commit_mode ? "none (default)" :
-   commit_mode == 1 ? "at the end" : "after each");
+   commit_mode == COMMIT_AFTER ? "at the end" : "after 
each");
}
  
  	cnt = optind;

@@ -338,50 +342,81 @@ again:
}
  
  	printf("Delete subvolume (%s): '%s/%s'\n",

-   commit_mode == 2 || (commit_mode == 1 && cnt + 1 == argc)
-   ? "commit" : "no-commit", dname, vname);
+   commit_mode == COMMIT_EACH ? "commit" : "no-commit", dname, 
vname);
memset(, 0, sizeof(args));
strncpy_null(args.name, vname);
res = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, );
-   if(res < 0 ){
+   if (res < 0) {
error("cannot delete '%s/%s': %s", dname, vname,
strerror(errno));
ret = 1;
goto out;
}
  
-	if (commit_mode == 1) {

+   if (commit_mode == COMMIT_EACH) {
res = wait_for_commit(fd);
if (res < 0) {
-   error("unable to wait for commit after '%s': %s",
+   error("unable to wait for commit after deleting '%s': 
%s",
path, strerror(errno));
ret = 1;
}
+   } else if (commit_mode == COMMIT_AFTER) {
+   res = get_fsid(dname, fsid, 0);
+   if (res < 0) {
+   error("unable to get fsid for '%s': %s", path, 
strerror(-res));
+   error("delete suceeded but commit may not be done in the 
end");
+   ret = 1;
+   goto out;
+   }
+
+   if (add_seen_fsid(fsid, seen_fsid_hash, fd, dirstream) == 0) {
+   if (verbose > 0) {
+   uuid_unparse(fsid, uuidbuf);
+   printf("  new fs is found for '%s', fsid: 
%s\n", path, uuidbuf);
+   }
+   // this is the first time a subvolume on this 
filesystem is deleted
+   // keep fd in order to issue SYNC ioctl in the end
+   goto keep_fd;
+   }
}
  
  out:

+   close_file_or_dir(fd, dirstream);
+keep_fd:
+   fd = -1;
+   dirstream = NULL;
free(dupdname);
free(dupvname);
dupdname = NULL;
dupvname = NULL;
cnt++;
-   if (cnt < argc) {
-   close_file_or_dir(fd, dirstream);
-   /* avoid double free */
-   fd = -1;
-   dirstream = NULL;
+   if (cnt < argc)
goto again;
-   }
  
-	if (commit_mode == 2 && fd != -1) {

-   res = wait_for_commit(fd);
-   if (res < 0) {
-   error("unable to do final sync after deletion: %s",
-   strerror(errno));
-   ret = 1;
+   if 

Re: [PATCH 3/4] btrfs-progs: change seen_fsid to hold fd and DIR*

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 13:45, Misono, Tomohiro wrote:

Change seen_fsid to hold fd and DIR* in order to keep access to each fs.
This will be used for 'subvol delete --commit-after'.


It is already quite good, good enough for the fix.

However just a small point can be further enhanced, commended below.



Signed-off-by: Tomohiro Misono 
---
  cmds-filesystem.c | 4 ++--
  utils.c   | 6 +-
  utils.h   | 5 -
  3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index c7dae40..4bbff43 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -277,7 +277,7 @@ static void print_one_uuid(struct btrfs_fs_devices 
*fs_devices,
u64 devs_found = 0;
u64 total;
  
-	if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash))

+   if (add_seen_fsid(fs_devices->fsid, seen_fsid_hash, -1, NULL))
return;
  
  	uuid_unparse(fs_devices->fsid, uuidbuf);

@@ -324,7 +324,7 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args 
*fs_info,
struct btrfs_ioctl_dev_info_args *tmp_dev_info;
int ret;
  
-	ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash);

+   ret = add_seen_fsid(fs_info->fsid, seen_fsid_hash, -1, NULL);
if (ret == -EEXIST)
return 0;
else if (ret)
diff --git a/utils.c b/utils.c
index f91d41e..bdfbfe0 100644
--- a/utils.c
+++ b/utils.c
@@ -1804,7 +1804,8 @@ int is_seen_fsid(u8 *fsid, struct seen_fsid 
*seen_fsid_hash[])
return 0;
  }
  
-int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[])

+int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
+   int fd, DIR *dirstream)
  {
u8 hash = fsid[0];
int slot = hash % SEEN_FSID_HASH_SIZE;
@@ -1832,6 +1833,8 @@ insert:
  
  	alloc->next = NULL;

memcpy(alloc->fsid, fsid, BTRFS_FSID_SIZE);
+   alloc->fd = fd;
+   alloc->dirstream = dirstream;
  
  	if (seen)

seen->next = alloc;
@@ -1851,6 +1854,7 @@ void free_seen_fsid(struct seen_fsid *seen_fsid_hash[])
seen = seen_fsid_hash[slot];
while (seen) {
next = seen->next;
+   close_file_or_dir(seen->fd, seen->dirstream);
free(seen);
seen = next;
}
diff --git a/utils.h b/utils.h
index da34e6c..bac7688 100644
--- a/utils.h
+++ b/utils.h
@@ -107,9 +107,12 @@ int get_fsid(const char *path, u8 *fsid, int silent);
  struct seen_fsid {
u8 fsid[BTRFS_FSID_SIZE];
struct seen_fsid *next;
+   int fd;


Will it be possible that the final fd recorded here is invalid or some 
other reason that we failed to execute SYNC ioctl on that fd, but can 
succeeded with other fd?


In that case, a list of fd will help.

Thanks,
Qu


+   DIR *dirstream;
  };
  int is_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
-int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[]);
+int add_seen_fsid(u8 *fsid, struct seen_fsid *seen_fsid_hash[],
+   int fd, DIR *dirstream);
  void free_seen_fsid(struct seen_fsid *seen_fsid_hash[]);
  
  int get_label(const char *btrfs_dev, char *label);



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/2] btrfs: fix BUG_ON in btrfs_init_new_device()

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 16:41, Anand Jain wrote:

Instead of BUG_ON return error to the caller. And handle the fail
condition by calling the abort transaction and going through the
error path.

Signed-off-by: Anand Jain 


Reviewed-by: Qu Wenruo 

Thanks,
Qu


---
  fs/btrfs/volumes.c | 14 +-
  1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9d64700cc9b6..4cb575fbf643 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2399,7 +2399,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
if (seeding_dev) {
sb->s_flags &= ~MS_RDONLY;
ret = btrfs_prepare_sprout(fs_info);
-   BUG_ON(ret); /* -ENOMEM */
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   goto error_trans;
+   }
}
  
  	device->fs_devices = fs_info->fs_devices;

@@ -2445,14 +2448,14 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
mutex_unlock(_info->chunk_mutex);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
}
  
  	ret = btrfs_add_device(trans, fs_info, device);

if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
  
  	if (seeding_dev) {

@@ -2461,7 +2464,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
ret = btrfs_finish_sprout(trans, fs_info);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
  
  		/* Sprouting would change fsid of the mounted root,

@@ -2500,12 +2503,13 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
update_dev_time(device_path);
return ret;
  
+error_sysfs:

+   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
  error_trans:
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
-   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
kfree(device);
  error:
blkdev_put(bdev, FMODE_EXCL);


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 1/2] btrfs: undo writable when sprouting fails

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 16:41, Anand Jain wrote:

When new device is being added to seed FS, seed FS is marked writable,
but when we fail to bring in the new device, we missed to undo the
writable part. This patch fixes it.

Signed-off-by: Anand Jain 
---
  fs/btrfs/volumes.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..9d64700cc9b6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2501,6 +2501,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
return ret;
  
  error_trans:

+   if (seeding_dev)
+   sb->s_flags |= MS_RDONLY;


Still the same question.

---
if (seeding_dev) {
mutex_unlock(_mutex);
up_write(>s_umount);

if (ret) /* transaction commit */
return ret;

ret = btrfs_relocate_sys_chunks(fs_info);
if (ret < 0)
btrfs_handle_fs_error(fs_info, ret,
"Failed to relocate sys chunks after device initialization. This 
can be fixed using the \"btrfs balance\" command.");

trans = btrfs_attach_transaction(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) == -ENOENT)
return 0;
return PTR_ERR(trans);
}
---
In the above btrfs_attch_transaction() error handler, which just 
returned without going to error_trans tags, did we misseed the s_flag 
revert thing?


Thanks,
Qu


btrfs_end_transaction(trans);
rcu_string_free(device->name);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/4] btrfs: Add sanity check for EXTENT_DATA when reading out leaf

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 20:13, David Sterba wrote:

On Tue, Sep 26, 2017 at 08:46:29AM +0800, Qu Wenruo wrote:

On 2017年09月26日 08:28, Qu Wenruo wrote:

On 2017年09月25日 23:45, David Sterba wrote:

On Wed, Aug 23, 2017 at 04:57:58PM +0900, Qu Wenruo wrote:

Add extra checker for item with EXTENT_DATA type.
This checks the following thing:
0) Key offset
     All key offset must be aligned to sectorsize.
     Inline extent must have 0 for key offset.

1) Item size
     Plain text inline file extent size must match item size.


'plain text' seems to be a bit misleading, I don't think we've ever
referred to uncompressed extent as such, although it makes some sense. I
think 'uncompressed' would work too.


I'll use 'uncompressed' instead.




     (compressed inline file extent has no info about its on-disk size)
     Regular/preallocated file extent size must be a fixed value.

2) Every member of regular file extent item
     Including alignment for bytenr and offset, possible value for
     compression/encryption/type.

3) Type/compression/encode must be one of the valid values.

This should be the most comprehensive and restrict check in the context
of btrfs_item for EXTENT_DATA.

Signed-off-by: Qu Wenruo 
---
   fs/btrfs/disk-io.c  | 108

   include/uapi/linux/btrfs_tree.h |   1 +
   2 files changed, 109 insertions(+)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e034d08bd036..b92296c6a698 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -549,6 +549,103 @@ static int check_tree_block_fsid(struct
btrfs_fs_info *fs_info,
  btrfs_header_level(eb) == 0 ? "leaf" : "node",    \
  reason, btrfs_header_bytenr(eb), root->objectid, slot)
+static int check_extent_data_item(struct btrfs_root *root,
+  struct extent_buffer *leaf,
+  struct btrfs_key *key, int slot)
+{
+    struct btrfs_file_extent_item *fi;
+    u32 sectorsize = root->fs_info->sectorsize;
+    u32 item_size = btrfs_item_size_nr(leaf, slot);
+
+    if (!IS_ALIGNED(key->offset, sectorsize)) {
+    CORRUPT("unaligned key offset for file extent",


The CORRUPT macro does not print any details beyond what it gets from
the parameters, so here we'd like to know which extent it is and what's
the size. The sectorsize can be found elsewhere so it does not need
to be printed.


Did you mean, the corrupt can be enhanced just like btrfs_printk() to
have custom format and args list?


Yeah something like that. And also make it a function, unless we're
going to use some macro magic (I'm not sure about that yet).

Schematically:

btrfs_report_corruption(fs_info, EXTENT_ITEM, "file item type is unknown: %d", 
fi->type);

implemented as:

btrfs_err(fs_info,
"corruption detected for item %d: "
"file item type is unknown: %d",
EXTENT_ITEM
fi->type);

Here comes the magic: as btrfs_err will print the arguments on one line
and adds the \n, we have to merge the string and the argument list into
one.

But if adding a separate helper similar to btrfs_err would be more
suitable, then ok.

I'll try to implement it, but I'm a little afraid that it may turn out 
to little common routine for the error report and we may need to 
manually craft the output for each member.


But I'll keep this point in mind when updating the patchset.


BTW, what about moving these checkers and error reporting mechanism to 
its own C file?
The code will definitely get larger and larger, I think moving them to 
one separate file at the very beginning will save us more time.


Thanks,
Qu
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/4] btrfs: Add sanity check for EXTENT_DATA when reading out leaf

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 20:05, David Sterba wrote:

On Tue, Sep 26, 2017 at 08:28:25AM +0800, Qu Wenruo wrote:



On 2017年09月25日 23:45, David Sterba wrote:

On Wed, Aug 23, 2017 at 04:57:58PM +0900, Qu Wenruo wrote:

Add extra checker for item with EXTENT_DATA type.
This checks the following thing:
0) Key offset
 All key offset must be aligned to sectorsize.
 Inline extent must have 0 for key offset.

1) Item size
 Plain text inline file extent size must match item size.


'plain text' seems to be a bit misleading, I don't think we've ever
referred to uncompressed extent as such, although it makes some sense. I
think 'uncompressed' would work too.


I'll use 'uncompressed' instead.


I've applied an fixed that in the changelog, as there were some other
changes needed due to other patch removing the BTRFS_COMPRESSION_LAST.


Checked the commit 0826e7faa895f5463e4790082392cdaaff98d8d8, which uses 
BTRFS_FILE_EXTENT_TYPES and doesn't increase but using the last value.


Looks very good to me.




 (compressed inline file extent has no info about its on-disk size)
 Regular/preallocated file extent size must be a fixed value.

2) Every member of regular file extent item
 Including alignment for bytenr and offset, possible value for
 compression/encryption/type.

3) Type/compression/encode must be one of the valid values.

This should be the most comprehensive and restrict check in the context
of btrfs_item for EXTENT_DATA.

Signed-off-by: Qu Wenruo 
---
   fs/btrfs/disk-io.c  | 108 

   include/uapi/linux/btrfs_tree.h |   1 +
   2 files changed, 109 insertions(+)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e034d08bd036..b92296c6a698 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -549,6 +549,103 @@ static int check_tree_block_fsid(struct btrfs_fs_info 
*fs_info,
   btrfs_header_level(eb) == 0 ? "leaf" : "node",   \
   reason, btrfs_header_bytenr(eb), root->objectid, slot)
   
+static int check_extent_data_item(struct btrfs_root *root,

+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+   struct btrfs_file_extent_item *fi;
+   u32 sectorsize = root->fs_info->sectorsize;
+   u32 item_size = btrfs_item_size_nr(leaf, slot);
+
+   if (!IS_ALIGNED(key->offset, sectorsize)) {
+   CORRUPT("unaligned key offset for file extent",


The CORRUPT macro does not print any details beyond what it gets from
the parameters, so here we'd like to know which extent it is and what's
the size. The sectorsize can be found elsewhere so it does not need
to be printed.


Did you mean despite bytenr of the tree block and root objectid, we
should output more info about the key?


Possibly yes, but rather more about the item. If the key objectid/offset
are eg. some structural value like the extent offset etc.


Understood.

So in short, the reporter should do:
1) Report the wrong value
2) Report expected value (or value range)
3) Using meaningful name other than key values
4) Report extra meaningful values if they passed their checker
5) Checker order must follow member order

And following above principle, using wrong file_extent_type as example,
it should report like:
---
root=512 ino=768 file_offset=4096 invalid file_extent_type, have 0x4, 
expected range [0, 2]

---

What about this principle?

(Although I think it's a little long, especially when extra fs_uuid 
appended)





Please note that, this condition is only for regular/prealloc file
extent items, so ram_bytes should be sectorsize aligned.


I can't find the tree dump and code confirms that the value should be
aligned.


Strange.

I just did a 6K write with compression, it produced such dump-tree result:
---
item 4 key (257 INODE_ITEM 0) itemoff 15881 itemsize 160
generation 7 transid 7 size 6144 nbytes 8192 <<<
block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
sequence 0 flags 0x2(none)

item 5 key (257 INODE_REF 256) itemoff 15866 itemsize 15
index 2 namelen 5 name: file1
item 6 key (257 EXTENT_DATA 0) itemoff 15813 itemsize 53
generation 7 type 1 (regular)
extent data disk byte 12845056 nr 4096 <<<
extent data offset 0 nr 8192 ram 8192 <<<
extent compression 1 (zlib)
---

Since the compression is all page based, I didn't think the ram_bytes 
can be page unaligned.


Anyway, I can remove the ram_bytes check until we get a better 
understanding of its definition.

(And better record it in wiki for later developers)

All these checker must be fully reviewed and get agreement from all 
reviewers.

Or it will cause tons of error report from end users.
So I'm pretty OK to delay the merge 1 or 2 cycles.



And if above error report principles are OK 

Re: btrfs scrub crashes OS

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 19:50, Lukas Pirl wrote:

On 09/26/2017 11:36 AM, Qu Wenruo wrote as excerpted:

This is strange, this means that we can't find a chunk map for a 72K
length data extent.

Either the new mapper code has some bug, or it's a big problem.
But I think it's more possible for former case.

Would you please try to dump the chunk tree (which should be quite
small) using the following command?

$ btrfs inspect-internal dump-tree -t chunk 


Sure, happy to provide that:
   https://static.lukas-pirl.de/dump-chunk-tree.txt
(too large for Pastebin, file will probably go away in a couple of weeks).



Found the needed info.

Your original data extent range is [644337258496, +72K].
Which is just in the range of the following 2 chunks:

---
	item 5 key (FIRST_CHUNK_TREE CHUNK_ITEM 643200712704) itemoff 15611 
itemsize 112

length 1073741824 owner 2 stripe_len 65536 type DATA|RAID1


	item 6 key (FIRST_CHUNK_TREE CHUNK_ITEM 645348196352) itemoff 15499 
itemsize 112

length 1073741824 owner 2 stripe_len 65536 type DATA|RAID1
io_align 65536 io_width 65536 sector_size 4096

---
Those two chunks are covering the ranges of:
[643200712704, +1G)
[645348196352, +1G)

And no other chunk covers the hole between them.
But your original data extent range is in that hole.

So offline scrub output that error messages.
At least the chunk mapping code is correct.

Maybe something is wrong in the extent tree.
But anyway, it shouldn't cause too much trouble in offline scrub as you 
can see, it's a user space program and handles the problem quite well.

(Outputs error message and continue, without panic out)

So reading all your disk may still be needed to wipe out or confirm the 
possibility of the disk IO routine.


Thanks,
Qu


Cheers,

Lukas


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/4] btrfs: Add sanity check for EXTENT_DATA when reading out leaf

2017-09-26 Thread David Sterba
On Tue, Sep 26, 2017 at 08:46:29AM +0800, Qu Wenruo wrote:
> On 2017年09月26日 08:28, Qu Wenruo wrote:
> > On 2017年09月25日 23:45, David Sterba wrote:
> >> On Wed, Aug 23, 2017 at 04:57:58PM +0900, Qu Wenruo wrote:
> >>> Add extra checker for item with EXTENT_DATA type.
> >>> This checks the following thing:
> >>> 0) Key offset
> >>>     All key offset must be aligned to sectorsize.
> >>>     Inline extent must have 0 for key offset.
> >>>
> >>> 1) Item size
> >>>     Plain text inline file extent size must match item size.
> >>
> >> 'plain text' seems to be a bit misleading, I don't think we've ever
> >> referred to uncompressed extent as such, although it makes some sense. I
> >> think 'uncompressed' would work too.
> > 
> > I'll use 'uncompressed' instead.
> > 
> >>
> >>>     (compressed inline file extent has no info about its on-disk size)
> >>>     Regular/preallocated file extent size must be a fixed value.
> >>>
> >>> 2) Every member of regular file extent item
> >>>     Including alignment for bytenr and offset, possible value for
> >>>     compression/encryption/type.
> >>>
> >>> 3) Type/compression/encode must be one of the valid values.
> >>>
> >>> This should be the most comprehensive and restrict check in the context
> >>> of btrfs_item for EXTENT_DATA.
> >>>
> >>> Signed-off-by: Qu Wenruo 
> >>> ---
> >>>   fs/btrfs/disk-io.c  | 108 
> >>> 
> >>>   include/uapi/linux/btrfs_tree.h |   1 +
> >>>   2 files changed, 109 insertions(+)
> >>>
> >>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> >>> index e034d08bd036..b92296c6a698 100644
> >>> --- a/fs/btrfs/disk-io.c
> >>> +++ b/fs/btrfs/disk-io.c
> >>> @@ -549,6 +549,103 @@ static int check_tree_block_fsid(struct 
> >>> btrfs_fs_info *fs_info,
> >>>  btrfs_header_level(eb) == 0 ? "leaf" : "node",    \
> >>>  reason, btrfs_header_bytenr(eb), root->objectid, slot)
> >>> +static int check_extent_data_item(struct btrfs_root *root,
> >>> +  struct extent_buffer *leaf,
> >>> +  struct btrfs_key *key, int slot)
> >>> +{
> >>> +    struct btrfs_file_extent_item *fi;
> >>> +    u32 sectorsize = root->fs_info->sectorsize;
> >>> +    u32 item_size = btrfs_item_size_nr(leaf, slot);
> >>> +
> >>> +    if (!IS_ALIGNED(key->offset, sectorsize)) {
> >>> +    CORRUPT("unaligned key offset for file extent",
> >>
> >> The CORRUPT macro does not print any details beyond what it gets from
> >> the parameters, so here we'd like to know which extent it is and what's
> >> the size. The sectorsize can be found elsewhere so it does not need
> >> to be printed.
> 
> Did you mean, the corrupt can be enhanced just like btrfs_printk() to 
> have custom format and args list?

Yeah something like that. And also make it a function, unless we're
going to use some macro magic (I'm not sure about that yet).

Schematically:

btrfs_report_corruption(fs_info, EXTENT_ITEM, "file item type is unknown: %d", 
fi->type);

implemented as:

btrfs_err(fs_info,
"corruption detected for item %d: "
"file item type is unknown: %d",
EXTENT_ITEM
fi->type);

Here comes the magic: as btrfs_err will print the arguments on one line
and adds the \n, we have to merge the string and the argument list into
one.

But if adding a separate helper similar to btrfs_err would be more
suitable, then ok.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 0/2] fix bug in btrfs_init_new_device()

2017-09-26 Thread Nikolay Borisov


On 26.09.2017 11:47, Anand Jain wrote:
> 1/2 fixes a bug which failed to reset writable when sprouting failed
> 2/2 fixes BUG_ON in btrfs_init_new_device()
> 
> Anand Jain (2):
>   btrfs: undo writable when sprouting fails
>   btrfs: fix BUG_ON in btrfs_init_new_device()

Reviewed-by: Nikolay Borisov 

> 
>  fs/btrfs/volumes.c | 16 +++-
>  1 file changed, 11 insertions(+), 5 deletions(-)
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/4] btrfs: Add sanity check for EXTENT_DATA when reading out leaf

2017-09-26 Thread David Sterba
On Tue, Sep 26, 2017 at 08:28:25AM +0800, Qu Wenruo wrote:
> 
> 
> On 2017年09月25日 23:45, David Sterba wrote:
> > On Wed, Aug 23, 2017 at 04:57:58PM +0900, Qu Wenruo wrote:
> >> Add extra checker for item with EXTENT_DATA type.
> >> This checks the following thing:
> >> 0) Key offset
> >> All key offset must be aligned to sectorsize.
> >> Inline extent must have 0 for key offset.
> >>
> >> 1) Item size
> >> Plain text inline file extent size must match item size.
> > 
> > 'plain text' seems to be a bit misleading, I don't think we've ever
> > referred to uncompressed extent as such, although it makes some sense. I
> > think 'uncompressed' would work too.
> 
> I'll use 'uncompressed' instead.

I've applied an fixed that in the changelog, as there were some other
changes needed due to other patch removing the BTRFS_COMPRESSION_LAST.

> >> (compressed inline file extent has no info about its on-disk size)
> >> Regular/preallocated file extent size must be a fixed value.
> >>
> >> 2) Every member of regular file extent item
> >> Including alignment for bytenr and offset, possible value for
> >> compression/encryption/type.
> >>
> >> 3) Type/compression/encode must be one of the valid values.
> >>
> >> This should be the most comprehensive and restrict check in the context
> >> of btrfs_item for EXTENT_DATA.
> >>
> >> Signed-off-by: Qu Wenruo 
> >> ---
> >>   fs/btrfs/disk-io.c  | 108 
> >> 
> >>   include/uapi/linux/btrfs_tree.h |   1 +
> >>   2 files changed, 109 insertions(+)
> >>
> >> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> >> index e034d08bd036..b92296c6a698 100644
> >> --- a/fs/btrfs/disk-io.c
> >> +++ b/fs/btrfs/disk-io.c
> >> @@ -549,6 +549,103 @@ static int check_tree_block_fsid(struct 
> >> btrfs_fs_info *fs_info,
> >>   btrfs_header_level(eb) == 0 ? "leaf" : "node",   \
> >>   reason, btrfs_header_bytenr(eb), root->objectid, slot)
> >>   
> >> +static int check_extent_data_item(struct btrfs_root *root,
> >> +struct extent_buffer *leaf,
> >> +struct btrfs_key *key, int slot)
> >> +{
> >> +  struct btrfs_file_extent_item *fi;
> >> +  u32 sectorsize = root->fs_info->sectorsize;
> >> +  u32 item_size = btrfs_item_size_nr(leaf, slot);
> >> +
> >> +  if (!IS_ALIGNED(key->offset, sectorsize)) {
> >> +  CORRUPT("unaligned key offset for file extent",
> > 
> > The CORRUPT macro does not print any details beyond what it gets from
> > the parameters, so here we'd like to know which extent it is and what's
> > the size. The sectorsize can be found elsewhere so it does not need
> > to be printed.
> 
> Did you mean despite bytenr of the tree block and root objectid, we 
> should output more info about the key?

Possibly yes, but rather more about the item. If the key objectid/offset
are eg. some structural value like the extent offset etc.

> By "size" did you mean the disk_num_bytes of the extent?

For example, yes. Once the generic checks are ok.

> But since the offset is already invalid, I don't think we can trust any 
> data from that structure, so what we can output is only about the key.
> 
> I can enhance the output to output the key, but I'm afraid I can't 
> output anything more than that.

The idea is to print what seems to be reasonable for futher analyzing
the corruption or debugging. Sometimes the raw numbers from error
messages can tell if it's a off-by-one or whether it's completely off
the scale.

Obviously we can dereference only data that have been found valid and
the items provide limited number of information, so yes there might be
actually almost nothing useful to print.

So this depends. Looking at check_extent_data_item, the disk_num_bytes
alignment checks cannot be moved before the item type checks, but still
the unaligned value can be printed in the message.

> 
> > 
> >> +  leaf, root, slot);
> >> +  return -EUCLEAN;
> >> +  }
> >> +
> >> +  fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
> >> +
> >> +  if (btrfs_file_extent_type(leaf, fi) >= BTRFS_FILE_EXTENT_LAST_TYPE) {
> >> +  CORRUPT("invalid file extent type", leaf, root, slot);
> > 
> > "invalid file extent type %d"
> > 
> > and actually, we could add some item-specific helpers to report the
> > corruption so if it's for an extent, print the generic extent
> > information, plus an additional information what exactly was wrong.
> 
> For additional info I'm completely fine.
> 
> But I'm not sure if it's good to output more info about the item.
> 
> The biggest problem is, when any member of the item can't pass 
> validation checker, we can't trust the whole item.

Agreed.

> So the item-specific info may not contain meaningful info.

I'd not expect more than repeating what was the condition that failed.
Something that would help me to match error message with the code and
how it 

Re: btrfs scrub crashes OS

2017-09-26 Thread Lukas Pirl
On 09/26/2017 11:36 AM, Qu Wenruo wrote as excerpted:
> This is strange, this means that we can't find a chunk map for a 72K
> length data extent.
> 
> Either the new mapper code has some bug, or it's a big problem.
> But I think it's more possible for former case.
> 
> Would you please try to dump the chunk tree (which should be quite
> small) using the following command?
> 
> $ btrfs inspect-internal dump-tree -t chunk 

Sure, happy to provide that:
  https://static.lukas-pirl.de/dump-chunk-tree.txt
(too large for Pastebin, file will probably go away in a couple of weeks).

Cheers,

Lukas
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] Btrfs: fix kernel oops while reading compressed data

2017-09-26 Thread David Sterba
On Tue, Sep 26, 2017 at 08:41:27AM +, Paul Jones wrote:
> > -Original Message-
> > From: linux-btrfs-ow...@vger.kernel.org [mailto:linux-btrfs-
> > ow...@vger.kernel.org] On Behalf Of David Sterba
> > Sent: Sunday, 24 September 2017 11:46 PM
> > To: Liu Bo 
> > Cc: linux-btrfs@vger.kernel.org
> > Subject: Re: [PATCH 1/2] Btrfs: fix kernel oops while reading compressed
> > data
> > 
> > On Wed, Sep 20, 2017 at 05:50:18PM -0600, Liu Bo wrote:
> > > The kernel oops happens at
> > >
> > > kernel BUG at fs/btrfs/extent_io.c:2104!
> > > ...
> > > RIP: clean_io_failure+0x263/0x2a0 [btrfs]
> > >
> > > It's showing that read-repair code is using an improper mirror index.
> > > This is due to the fact that compression read's endio hasn't recorded
> > > the failed mirror index in %cb->orig_bio.
> > >
> > > With this, btrfs's read-repair can work properly on reading compressed
> > > data.
> > >
> > > Signed-off-by: Liu Bo 
> > > Reported-by: Paul Jones 
> > 
> > Reviewed-by: David Sterba 
> 
> Tested-by: 
> For both patches.

Thanks for testing.

> I caused the same thing to happen again, this time by unplugging the
> wrong hard drive. Applied the patches and problem (BUG_ON) is gone.
> Should this also go to stable? Seems like a rather glaring problem to me.

Yes it should and will be forwarded there once it's merged to Linus'
tree.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: Resolve memory-leak in btrfs qgroup show when quota disabled

2017-09-26 Thread David Sterba
On Tue, Sep 26, 2017 at 05:28:31PM +0800, Gu Jinxiang wrote:
> When quota disabled, btrfs qgroup show exit with a error message,
> but the allocated memory is not freed.
> 
> By the way, this bug marked as issue#20 in github.
> 
> Signed-off-by: Gu Jinxiang 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs scrub crashes OS

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 17:26, Lukas Pirl wrote:

Hi Qu,

On 09/26/2017 10:51 AM, Qu Wenruo wrote as excerpted:

This make things more weird.
Just in case, are you executing offline scrub by "btrfs scrub start
--offline "


Yes. I even got some output (pretty sure the last lines are missing due
to the crash):

WARNING: Offline scrub doesn't support extra options other than -r
[I gave -d as well]
Invalid mapping for 644337258496-644337332224, got
645348196352-646421938176
Couldn't map the block 644337258496


This is strange, this means that we can't find a chunk map for a 72K 
length data extent.


Either the new mapper code has some bug, or it's a big problem.
But I think it's more possible for former case.

Would you please try to dump the chunk tree (which should be quite 
small) using the following command?


$ btrfs inspect-internal dump-tree -t chunk 


ERROR: failed to read out data at bytenr 644337258496 mirror 1
Invalid mapping for 653402148864-653402152960, got
653938130944-655011872768
Couldn't map the block 653402148864
ERROR: failed to read out data at bytenr 653402148864 mirror 1
Invalid mapping for 717315420160-717315526656, got
718362640384-719436382208
Couldn't map the block 717315420160
ERROR: failed to read out data at bytenr 717315420160 mirror 1
Invalid mapping for 875072008192-875072040960, got
875128946688-876202688512
Couldn't map the block 875072008192
ERROR: failed to read tree block 875072008192 mirror 1
ERROR: extent 875072008192 len 32768 CORRUPTED: all mirror(s)
corrupted, can't be recovered

Can I find out on which disk a mirror of a block is?


btrfs-map-logical can help you.
But I doubt if the offline scrub code, especially the new 
btrfs_map_block_v2() has hidden bug which caused the problem.


Withouth chunk tree dump, I am not which if it's a real bug or missing 
device.


Thanks,
Qu



If so, I think there may be some problem outside the btrfs territory.


Of course, that is a possibility…


Offline scrub has nothing to do with btrfs kernel module, it just reads
out on-disk data and verify checksum in *user* space.

So if offline scrub can also screw up the system, it means there is
something wrong in the disk IO routine, not btrfs.

And scrub can trigger it because normal btrfs IO won't try to read that
part/mirror.


…especially when considering this.


What about trying to read all data out of your raw disk?
If offline crashes the system, reading the disk may crash it also.
Using dd to read each of your disk (with btrfs unmounted) may expose
which disk caused the problem.


That it is good idea! Will go ahead.

Thanks for your help so far.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs-progs: Resolve memory-leak in btrfs qgroup show when quota disabled

2017-09-26 Thread Gu Jinxiang
When quota disabled, btrfs qgroup show exit with a error message,
but the allocated memory is not freed.

By the way, this bug marked as issue#20 in github.

Signed-off-by: Gu Jinxiang 
---
 cmds-qgroup.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cmds-qgroup.c b/cmds-qgroup.c
index 38382ea9..5fbfaa17 100644
--- a/cmds-qgroup.c
+++ b/cmds-qgroup.c
@@ -369,9 +369,8 @@ static int cmd_qgroup_show(int argc, char **argv)
path = argv[optind];
fd = btrfs_open_dir(path, , 1);
if (fd < 0) {
-   free(filter_set);
-   free(comparer_set);
-   return 1;
+   ret = 1;
+   goto out;
}
 
if (sync) {
@@ -406,6 +405,10 @@ static int cmd_qgroup_show(int argc, char **argv)
close_file_or_dir(fd, dirstream);
 
 out:
+   if (filter_set)
+   free(filter_set);
+   if (comparer_set)
+   free(comparer_set);
return !!ret;
 }
 
-- 
2.14.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: compress_file_range() remove dead variable num_bytes

2017-09-26 Thread Nikolay Borisov


On 12.09.2017 00:15, Timofey Titovets wrote:
> Remove dead assigment of num_bytes
> 
> Also as num_bytes only used in will_compress block as
> copy of total_in just replace that with total_in and drop num_bytes entire
> 
> Signed-off-by: Timofey Titovets 

Reviewed-by: Nikolay Borisov 

> ---
>  fs/btrfs/inode.c | 10 +++---
>  1 file changed, 3 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 17ad018da0a2..1ff4fa461555 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -446,7 +446,6 @@ static noinline void compress_file_range(struct inode 
> *inode,
>  {
>   struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>   struct btrfs_root *root = BTRFS_I(inode)->root;
> - u64 num_bytes;
>   u64 blocksize = fs_info->sectorsize;
>   u64 actual_end;
>   u64 isize = i_size_read(inode);
> @@ -496,8 +495,6 @@ static noinline void compress_file_range(struct inode 
> *inode,
>  
>   total_compressed = min_t(unsigned long, total_compressed,
>   BTRFS_MAX_UNCOMPRESSED);
> - num_bytes = ALIGN(end - start + 1, blocksize);
> - num_bytes = max(blocksize,  num_bytes);
>   total_in = 0;
>   ret = 0;
>  
> @@ -613,7 +610,6 @@ static noinline void compress_file_range(struct inode 
> *inode,
>*/
>   total_in = ALIGN(total_in, PAGE_SIZE);
>   if (total_compressed + blocksize <= total_in) {
> - num_bytes = total_in;
>   *num_added += 1;
>  
>   /*
> @@ -621,12 +617,12 @@ static noinline void compress_file_range(struct inode 
> *inode,
>* allocation on disk for these compressed pages, and
>* will submit them to the elevator.
>*/
> - add_async_extent(async_cow, start, num_bytes,
> + add_async_extent(async_cow, start, total_in,
>   total_compressed, pages, nr_pages,
>   compress_type);
>  
> - if (start + num_bytes < end) {
> - start += num_bytes;
> + if (start + total_in < end) {
> + start += total_in;
>   pages = NULL;
>   cond_resched();
>   goto again;
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs scrub crashes OS

2017-09-26 Thread Lukas Pirl
Hi Qu,

On 09/26/2017 10:51 AM, Qu Wenruo wrote as excerpted:
> This make things more weird.
> Just in case, are you executing offline scrub by "btrfs scrub start
> --offline "

Yes. I even got some output (pretty sure the last lines are missing due
to the crash):

WARNING: Offline scrub doesn't support extra options other than -r
[I gave -d as well]
Invalid mapping for 644337258496-644337332224, got
645348196352-646421938176
Couldn't map the block 644337258496
ERROR: failed to read out data at bytenr 644337258496 mirror 1
Invalid mapping for 653402148864-653402152960, got
653938130944-655011872768
Couldn't map the block 653402148864
ERROR: failed to read out data at bytenr 653402148864 mirror 1
Invalid mapping for 717315420160-717315526656, got
718362640384-719436382208
Couldn't map the block 717315420160
ERROR: failed to read out data at bytenr 717315420160 mirror 1
Invalid mapping for 875072008192-875072040960, got
875128946688-876202688512
Couldn't map the block 875072008192
ERROR: failed to read tree block 875072008192 mirror 1
ERROR: extent 875072008192 len 32768 CORRUPTED: all mirror(s)
corrupted, can't be recovered

Can I find out on which disk a mirror of a block is?

> If so, I think there may be some problem outside the btrfs territory.

Of course, that is a possibility…

> Offline scrub has nothing to do with btrfs kernel module, it just reads
> out on-disk data and verify checksum in *user* space.
> 
> So if offline scrub can also screw up the system, it means there is
> something wrong in the disk IO routine, not btrfs.
> 
> And scrub can trigger it because normal btrfs IO won't try to read that
> part/mirror.

…especially when considering this.

> What about trying to read all data out of your raw disk?
> If offline crashes the system, reading the disk may crash it also.
> Using dd to read each of your disk (with btrfs unmounted) may expose
> which disk caused the problem.

That it is good idea! Will go ahead.

Thanks for your help so far.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] fstests: btrfs/150 regression test for reading compressed data

2017-09-26 Thread Eryu Guan
On Fri, Sep 22, 2017 at 05:21:27PM -0600, Liu Bo wrote:
> We had a bug in btrfs compression code which could end up with a
> kernel panic.
> 
> This is adding a regression test for the bug and I've also sent a
> kernel patch to fix the bug.
> 
> The patch is "Btrfs: fix kernel oops while reading compressed data".
> 
> Signed-off-by: Liu Bo 

Hmm, I can't reproduce the panic with 4.13 kernel, which doesn't have
the fix applied. Can you please help confirm if it panics on your test
environment?

> ---
> v2: - Fix ambiguous copyright.
> - Use /proc/$pid/make-it-fail to specify IO failure
> - Use bash -c to run test only when pid is odd.
> - Add test to dangerous group.
> 
>  tests/btrfs/150 | 103 
> 
>  tests/btrfs/150.out |   3 ++
>  tests/btrfs/group   |   1 +
>  3 files changed, 107 insertions(+)
>  create mode 100755 tests/btrfs/150
>  create mode 100644 tests/btrfs/150.out
> 
> diff --git a/tests/btrfs/150 b/tests/btrfs/150
> new file mode 100755
> index 000..8891c38
> --- /dev/null
> +++ b/tests/btrfs/150
> @@ -0,0 +1,103 @@
> +#! /bin/bash
> +# FS QA Test btrfs/150
> +#
> +# This is a regression test which ends up with a kernel oops in btrfs.
> +# It occurs when btrfs's read repair happens while reading a compressed
> +# extent.
> +# The patch to fix it is
> +#Btrfs: fix kernel oops while reading compressed data
> +#
> +#---
> +# Copyright (c) 2017 Oracle.  All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#---
> +#
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1 # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> + cd /
> + rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +
> +# Modify as appropriate.
> +_supported_fs btrfs
> +_supported_os Linux
> +_require_scratch
> +_require_fail_make_request
> +_require_scratch_dev_pool 2 

Trailing whitespace in above line.

> +
> +SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
> +enable_io_failure()
> +{
> +echo 100 > $DEBUGFS_MNT/fail_make_request/probability
> +echo 1000 > $DEBUGFS_MNT/fail_make_request/times
> +echo 0 > $DEBUGFS_MNT/fail_make_request/verbose
> +echo 1 > $SYSFS_BDEV/make-it-fail
> +}
> +
> +disable_io_failure()
> +{
> +echo 0 > $SYSFS_BDEV/make-it-fail
> +echo 0 > $DEBUGFS_MNT/fail_make_request/probability
> +echo 0 > $DEBUGFS_MNT/fail_make_request/times
> +}
> +
> +_scratch_pool_mkfs "-d raid1 -b 1G" >> $seqres.full 2>&1
> +
> +# It doesn't matter which compression algorithm we use.
> +_scratch_mount -ocompress
> +
> +# Create a file with all data being compressed
> +$XFS_IO_PROG -f -c "pwrite -W 0 8K" $SCRATCH_MNT/foobar | _filter_xfs_io
> +
> +# Raid1 consists of two copies and btrfs decides which copy to read by 
> reader's
> +# %pid.  Now we inject errors to copy #1 and copy #0 is good.  We want to 
> read
> +# the bad copy to trigger read-repair.
> +while [[ -z $result ]]; do
> + # invalidate the page cache
> + $XFS_IO_PROG -f -c "fadvise -d 0 8K" $SCRATCH_MNT/foobar

Does 'echo 3 > /proc/sys/vm/drop_caches' work?

Thanks,
Eryu

> +
> + enable_io_failure
> +
> + result=$(bash -c "
> + if [ \$((\$\$ % 2)) == 1 ]; then
> + echo 1 > /proc/\$\$/make-it-fail
> + exec $XFS_IO_PROG -c \"pread 0 8K\" \$SCRATCH_MNT/foobar
> + fi")
> +
> + disable_io_failure
> +done
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/btrfs/150.out b/tests/btrfs/150.out
> new file mode 100644
> index 000..c492c24
> --- /dev/null
> +++ b/tests/btrfs/150.out
> @@ -0,0 +1,3 @@
> +QA output created by 150
> +wrote 8192/8192 bytes at offset 0
> +XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> diff --git a/tests/btrfs/group b/tests/btrfs/group
> index 70c3f05..e73bb1b 100644
> --- a/tests/btrfs/group
> +++ b/tests/btrfs/group
> @@ -152,3 +152,4 @@
>  147 auto quick send
>  148 

Re: btrfs scrub crashes OS

2017-09-26 Thread Qu Wenruo



On 2017年09月26日 16:34, Lukas Pirl wrote:

Dear Qu,

thanks for your reply.

On 09/25/2017 12:19 PM, Qu Wenruo wrote as excerpted:

Even no dmesg output using tty or netconsole?


And thanks for the pointer to netconsole, I tried that one.
No success. I set netconsole up, verified it worked, started a scrub,
the machine went away after a couple of hours, netconsole empty.


Sad to hear that.
This means we have nothing to refer to, so it's really hard to continue 
investigating (if not impossible).





That's strange.
Normally it should be kernel BUG_ON() to cause such problem.
  
And if the system is still responsible (either from TTY or ssh), is

there anything strange like tons of IO or CPU usage?


I can't tell, the machine just disappears from the network. Dead. IIRC,
it was also all dead when I sat in front of it.


Btrfs-progs v4.13 should have fixed it.
As long as v4.13 btrfs check reports no error, its metadata should be
good.


I can try that one, if helpful.


You could try the out-of-tree offline scrub to do a full scrub of your
fs unmounted, so it won't crash your system (if nothing wrong happened)
https://github.com/gujx2017/btrfs-progs/tree/offline_scrub


Did that, machine crashed again.


This make things more weird.

Just in case, are you executing offline scrub by "btrfs scrub start 
--offline "


If so, I think there may be some problem outside the btrfs territory.

Offline scrub has nothing to do with btrfs kernel module, it just reads 
out on-disk data and verify checksum in *user* space.


So if offline scrub can also screw up the system, it means there is 
something wrong in the disk IO routine, not btrfs.


And scrub can trigger it because normal btrfs IO won't try to read that 
part/mirror.





    MIXED_BACKREF, BIG_METADATA, EXTENDED_IREF, SKINNY_METADATA, NO_HOLES


Only NO_HOLES is not ordinary, but shouldn't cause a problem.


Would it be sensible to turn that feature off using `btrfstune` (if
possible at all)?



Not possible, and I don't believe it's related to that feature.


Without kernel backtrace, it's tricky to locate the problem.
So I would recommend to use netconsole (IIRC more reliable, as I use it
on my test VM to capture the dying message) or TTY output to verify
there is no kernel message/backtrace.


Yeah I see we are in a tricky situation here.

I will try to scrub with autodefrag and compression deactivated. >
Could a full balance be of any help? At least to find out if it crashes
the machine as well?


According to your report, I think full balance may also crash your 
system, and may further crash your system every time you try to mount it.


So I won't recommend to do it.


What about trying to read all data out of your raw disk?
If offline crashes the system, reading the disk may crash it also.
Using dd to read each of your disk (with btrfs unmounted) may expose 
which disk caused the problem.


Thanks,
Qu



Cheers,

Lukas


Thanks,
Qu


    no quotas in use
    see also https://pastebin.com/4me6zDsN for more details
btrfs-progs v4.12
GNU/Linux 4.12.0-0.bpo.1-amd64 #1 SMP Debian 4.12.6-1~bpo9+1 x86_64

The question, obviously, is how can I make this fs "scrubable" again?
Are the errors found by btrfsck safe to repair using btrfsck or some
other tool?

Thank you so much in advance,

Lukas
--
To unsubscribe from this list: send the line "unsubscribe
linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/2] btrfs: fix BUG_ON in btrfs_init_new_device()

2017-09-26 Thread Anand Jain
Instead of BUG_ON return error to the caller. And handle the fail
condition by calling the abort transaction and going through the
error path.

Signed-off-by: Anand Jain 
---
v2: do not consolidate btrfs_abort_transaction()
v3: meld 2/3 and 3/3 from v2

 fs/btrfs/volumes.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9d64700cc9b6..4cb575fbf643 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2399,7 +2399,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
if (seeding_dev) {
sb->s_flags &= ~MS_RDONLY;
ret = btrfs_prepare_sprout(fs_info);
-   BUG_ON(ret); /* -ENOMEM */
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   goto error_trans;
+   }
}
 
device->fs_devices = fs_info->fs_devices;
@@ -2445,14 +2448,14 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
mutex_unlock(_info->chunk_mutex);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
}
 
ret = btrfs_add_device(trans, fs_info, device);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
 
if (seeding_dev) {
@@ -2461,7 +2464,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
ret = btrfs_finish_sprout(trans, fs_info);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
 
/* Sprouting would change fsid of the mounted root,
@@ -2500,12 +2503,13 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
update_dev_time(device_path);
return ret;
 
+error_sysfs:
+   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
 error_trans:
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
-   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
kfree(device);
 error:
blkdev_put(bdev, FMODE_EXCL);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] Btrfs: fix kernel oops while reading compressed data

2017-09-26 Thread Paul Jones
> -Original Message-
> From: linux-btrfs-ow...@vger.kernel.org [mailto:linux-btrfs-
> ow...@vger.kernel.org] On Behalf Of David Sterba
> Sent: Sunday, 24 September 2017 11:46 PM
> To: Liu Bo 
> Cc: linux-btrfs@vger.kernel.org
> Subject: Re: [PATCH 1/2] Btrfs: fix kernel oops while reading compressed
> data
> 
> On Wed, Sep 20, 2017 at 05:50:18PM -0600, Liu Bo wrote:
> > The kernel oops happens at
> >
> > kernel BUG at fs/btrfs/extent_io.c:2104!
> > ...
> > RIP: clean_io_failure+0x263/0x2a0 [btrfs]
> >
> > It's showing that read-repair code is using an improper mirror index.
> > This is due to the fact that compression read's endio hasn't recorded
> > the failed mirror index in %cb->orig_bio.
> >
> > With this, btrfs's read-repair can work properly on reading compressed
> > data.
> >
> > Signed-off-by: Liu Bo 
> > Reported-by: Paul Jones 
> 
> Reviewed-by: David Sterba 

Tested-by: 
For both patches.

I caused the same thing to happen again, this time by unplugging the wrong hard 
drive. Applied the patches and problem (BUG_ON) is gone.
Should this also go to stable? Seems like a rather glaring problem to me.

Paul.




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/2] btrfs: undo writable when sprouting fails

2017-09-26 Thread Anand Jain
When new device is being added to seed FS, seed FS is marked writable,
but when we fail to bring in the new device, we missed to undo the
writable part. This patch fixes it.

Signed-off-by: Anand Jain 
---
v3: none
v2: add commit log

 fs/btrfs/volumes.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..9d64700cc9b6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2501,6 +2501,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
return ret;
 
 error_trans:
+   if (seeding_dev)
+   sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/2] fix bug in btrfs_init_new_device()

2017-09-26 Thread Anand Jain
1/2 fixes a bug which failed to reset writable when sprouting failed
2/2 fixes BUG_ON in btrfs_init_new_device()

Anand Jain (2):
  btrfs: undo writable when sprouting fails
  btrfs: fix BUG_ON in btrfs_init_new_device()

 fs/btrfs/volumes.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-26 Thread Anand Jain



The
sysfs rm stands though


  This is a preparatory patch for 3/3, at BUG_ON it needs an exit
without  the btrfs_sysfs_add_device_link() part.


In this case I'd rather you have this change in 3/3, because on its own
it doesn't really make sense and cannot easily be figured out if doing
git blame.


 Fixed in v3. Thanks for suggesting.

-Anand

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs scrub crashes OS

2017-09-26 Thread Lukas Pirl
Dear Qu,

thanks for your reply.

On 09/25/2017 12:19 PM, Qu Wenruo wrote as excerpted:
> Even no dmesg output using tty or netconsole?

And thanks for the pointer to netconsole, I tried that one.
No success. I set netconsole up, verified it worked, started a scrub,
the machine went away after a couple of hours, netconsole empty.

> That's strange.
> Normally it should be kernel BUG_ON() to cause such problem.
>  
> And if the system is still responsible (either from TTY or ssh), is
> there anything strange like tons of IO or CPU usage?

I can't tell, the machine just disappears from the network. Dead. IIRC,
it was also all dead when I sat in front of it.

> Btrfs-progs v4.13 should have fixed it.
> As long as v4.13 btrfs check reports no error, its metadata should be
> good.

I can try that one, if helpful.

> You could try the out-of-tree offline scrub to do a full scrub of your
> fs unmounted, so it won't crash your system (if nothing wrong happened)
> https://github.com/gujx2017/btrfs-progs/tree/offline_scrub

Did that, machine crashed again.

>>    MIXED_BACKREF, BIG_METADATA, EXTENDED_IREF, SKINNY_METADATA, NO_HOLES
> 
> Only NO_HOLES is not ordinary, but shouldn't cause a problem.

Would it be sensible to turn that feature off using `btrfstune` (if
possible at all)?

> Without kernel backtrace, it's tricky to locate the problem.
> So I would recommend to use netconsole (IIRC more reliable, as I use it
> on my test VM to capture the dying message) or TTY output to verify
> there is no kernel message/backtrace.

Yeah I see we are in a tricky situation here.

I will try to scrub with autodefrag and compression deactivated.

Could a full balance be of any help? At least to find out if it crashes
the machine as well?

Cheers,

Lukas

> Thanks,
> Qu
> 
>>    no quotas in use
>>    see also https://pastebin.com/4me6zDsN for more details
>> btrfs-progs v4.12
>> GNU/Linux 4.12.0-0.bpo.1-amd64 #1 SMP Debian 4.12.6-1~bpo9+1 x86_64
>>
>> The question, obviously, is how can I make this fs "scrubable" again?
>> Are the errors found by btrfsck safe to repair using btrfsck or some
>> other tool?
>>
>> Thank you so much in advance,
>>
>> Lukas
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe
>> linux-btrfs" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/2] btrfs: undo writable when sprouting fails

2017-09-26 Thread Anand Jain
When new device is being added to seed FS, seed FS is marked writable,
but when we fail to bring in the new device, we missed to undo the
writable part. This patch fixes it.

Signed-off-by: Anand Jain 
---
 fs/btrfs/volumes.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..9d64700cc9b6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2501,6 +2501,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
return ret;
 
 error_trans:
+   if (seeding_dev)
+   sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/2] fix bug in btrfs_init_new_device()

2017-09-26 Thread Anand Jain
1/2 fixes a bug which failed to reset writable when sprouting failed
2/2 fixes BUG_ON in btrfs_init_new_device()

Anand Jain (2):
  btrfs: undo writable when sprouting fails
  btrfs: fix BUG_ON in btrfs_init_new_device()

 fs/btrfs/volumes.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/2] btrfs: fix BUG_ON in btrfs_init_new_device()

2017-09-26 Thread Anand Jain
Instead of BUG_ON return error to the caller. And handle the fail
condition by calling the abort transaction and going through the
error path.

Signed-off-by: Anand Jain 
---
 fs/btrfs/volumes.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9d64700cc9b6..4cb575fbf643 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2399,7 +2399,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
if (seeding_dev) {
sb->s_flags &= ~MS_RDONLY;
ret = btrfs_prepare_sprout(fs_info);
-   BUG_ON(ret); /* -ENOMEM */
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   goto error_trans;
+   }
}
 
device->fs_devices = fs_info->fs_devices;
@@ -2445,14 +2448,14 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
mutex_unlock(_info->chunk_mutex);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
}
 
ret = btrfs_add_device(trans, fs_info, device);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
 
if (seeding_dev) {
@@ -2461,7 +2464,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
ret = btrfs_finish_sprout(trans, fs_info);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
 
/* Sprouting would change fsid of the mounted root,
@@ -2500,12 +2503,13 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
update_dev_time(device_path);
return ret;
 
+error_sysfs:
+   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
 error_trans:
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
-   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
kfree(device);
 error:
blkdev_put(bdev, FMODE_EXCL);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-26 Thread Nikolay Borisov


On 26.09.2017 10:57, Anand Jain wrote:
> 
>> The
>> sysfs rm stands though
> 
>  This is a preparatory patch for 3/3, at BUG_ON it needs an exit
> without  the btrfs_sysfs_add_device_link() part.

In this case I'd rather you have this change in 3/3, because on its own
it doesn't really make sense and cannot easily be figured out if doing
git blame.

> 
> -Anand
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Wrong device?

2017-09-26 Thread Duncan
linux-btrfs posted on Mon, 25 Sep 2017 19:11:01 +0300 as excerpted:

Three points first off:

1) Please use a name other than "linux-btrfs".  That can remain the email 
address, but a name to go with it would be nice.  (You can see how my 
name and email address differ, for instance.)

2) Please don't reply to existing threads with a totally different 
topic.  Proper reply-threading clients will see it's a reply to the other 
thread (it's in the headers) and display it as such, even if you change 
the subject line.  If it's a different topic, start a new thread, don't 
reply to an existing thread.

3) I'm not a dev, just another btrfs user and list regular.  So I won't 
attempt to address the real technical or complicated stuff, but I can try 
to reply to the easy stuff at least, freeing the devs for addressing the 
more complicated stuff if they see a bug they can fix or might already be 
working on.

As to the problem...

> I have 15 x 6 TB disks in md-raid (Because btrfs's raid6 was marked as
> not-for-real-use when I first time installed this machine)
> 
> Now I have got both problem twice.
> 
> At this last time I have 233 subvolums, and millions of files (all
> together)

Just a reminder prompted by seeing those numbers tho I'd guess you 
already have this covered...

Sysadmin's backups rule #1:  The true value of your data is defined not 
by any words /claiming/ value nor by what you use it for, as the machine 
doesn't care about that, but rather by the number of backups of it you 
consider it worth having.  No backups means you are defining the data to 
be of less value than the time/trouble/resources to make the backup, so 
loss is never a big deal, because either you either have a backup if you 
considered it important to you, or you already saved what you defined as 
more valuable to you than that data, the time, trouble and resources 
you'd have otherwise put into the backup.

(Similarly with the currency of those backups, only there it's the value 
of the data difference between your last backup and the working copy.  
Once the data in that difference is of more value than the time/trouble/
resources to update the backup, it'll be updated.  Otherwise, the data in 
that delta is obviously not valuable enough to be worth that trouble, and 
thus not valuable enough to be terribly worried about if lost.)

> Then filesystem went to read only with this dmesg:
> 
> [Sat Sep 23 07:25:28 2017] [ cut here ]
> [Sat Sep 23 07:25:28 2017] WARNING: CPU: 5 PID: 5431 at
> /build/linux-hwe-edge-CrMNv8/linux-hwe-edge-4.10.0/fs/btrfs/extent-
tree.c:6947
> __btrfs_free_extent.isra.61+0x2cb/0xeb0 [btrfs]
> [Sat Sep 23 07:25:28 2017] BTRFS: Transaction aborted (error -28)

> 4.10.0-26-generic #30~16.04.1-Ubuntu 

Note that this is the mainline btrfs development list, with btrfs still 
stabilizing, not yet fully stable and mature, so this list tends to be 
quite forward focused.  We recommend and best support the latest two 
mainline kernels in two tracks, current and LTS.  The current kernel is 
4.13, so on the current track, 4.13 and 4.12 are recommended and best 
supported.  On the LTS track, the coming 4.14 is scheduled to be an LTS, 
with 4.9 and 4.4 the two previous LTSs before that.

So the 4.9 LTS kernel series is supported, with 4.4 currently supported 
but on its way out and 4.14 on the way in.  And current track 4.13 and 
4.12 are supported, with 4.12 on the way out.

4.10 isn't an LTS kernel, and it's old enough it's already several 
kernels out of current support track.  So upgrading to current 4.13 or 
downgrading to 4.9 LTS series would be recommended.

Meanwhile, your distro is in a better position to support their kernels 
of /whatever/ version since they know what patches they've applied and 
what btrfs fixes they've backported... or not.

Of course we'll still try to help with 4.10, and it's not /too/ dated, 
but you can expect that you might get "does it still happen with a 
current kernel" type questions.

> [Sat Sep 23 07:25:28 2017] BTRFS: error (device sdb) in
> __btrfs_free_extent:6947: errno=-28 No space left [Sat Sep 23 07:25:28
> 2017] BTRFS: error (device sdb) in btrfs_drop_snapshot:9193: errno=-28
> No space left [Sat Sep 23 07:25:28 2017] BTRFS info (device sdb): forced
> readonly
> 
> 
> After a long googling (about more complex situations) I suddenly noticed
> "device sdb" WTF???  Filesystem is mounted from /dev/md3 (sdb is part of
> that mdraid) so btrfs should not even know anything about that /dev/sdb.
> 
> So maybe the error is more simple that I thought.  It tries to allocate
> more metadata from physical drive (not from /dev/md3 as it was supposed
> to do), so that "no space left on device" could be the REAL error...
> 
> But why it is doing so?  Does it help if I add some extra virtualization
> layer, like LVM?

Keep in mind that btrfs, unlike most other filesystems, can be multi-
device.  As such, it needs a way to track which devices are part of each 

Re: [PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-26 Thread Anand Jain



On 09/26/2017 03:57 PM, Anand Jain wrote:



The
sysfs rm stands though


  This is a preparatory patch for 3/3, at BUG_ON it needs an exit 
without  the btrfs_sysfs_add_device_link() part.


typo
s/btrfs_sysfs_add_device_link/btrfs_sysfs_rm_device_link/



-Anand
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-26 Thread Anand Jain



The
sysfs rm stands though


 This is a preparatory patch for 3/3, at BUG_ON it needs an exit 
without  the btrfs_sysfs_add_device_link() part.


-Anand
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/3] btrfs: fix BUG_ON in btrfs_init_new_device()

2017-09-26 Thread Anand Jain
Instead of BUG_ON return error to the caller. And handle the fail
condition by calling the abort transaction through the error path.

Signed-off-by: Anand Jain 
---
v2: do not consolidate btrfs_abort_transaction()
 fs/btrfs/volumes.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 22eb81794375..4cb575fbf643 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2399,7 +2399,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
if (seeding_dev) {
sb->s_flags &= ~MS_RDONLY;
ret = btrfs_prepare_sprout(fs_info);
-   BUG_ON(ret); /* -ENOMEM */
+   if (ret) {
+   btrfs_abort_transaction(trans, ret);
+   goto error_trans;
+   }
}
 
device->fs_devices = fs_info->fs_devices;
@@ -2502,6 +2505,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
 
 error_sysfs:
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+error_trans:
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/3] btrfs: undo writable when sprouting fails

2017-09-26 Thread Anand Jain
When new device is being added to seed FS, seed FS is marked writable,
but when we fail to bring in the new device, we missed to undo the
writable part. This patch fixes it.

Signed-off-by: Anand Jain 
---
v2: add commit log
 fs/btrfs/volumes.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..9d64700cc9b6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2501,6 +2501,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
return ret;
 
 error_trans:
+   if (seeding_dev)
+   sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 0/3] fix bug in btrfs_init_new_device()

2017-09-26 Thread Anand Jain
1/3 fixes a bug which failed to reset writable when sprouting failed
2/3 is cleanup and preparatory to fix 3/3
3/3 fixes BUG_ON in btrfs_init_new_device()

v2:
 add commit log to 1/3
 do not consolidate btrfs_abort_transaction() in 2/3

Anand Jain (3):
  btrfs: undo writable when sprouting fails
  btrfs: cleanup btrfs_init_new_device()
  btrfs: fix BUG_ON in btrfs_init_new_device()

 fs/btrfs/volumes.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/3] btrfs: cleanup btrfs_init_new_device()

2017-09-26 Thread Anand Jain
Moves btrfs_abort_transaction() to the error goto and renames
error_trans to error_sysfs. This is a preparatory patch to
remove the BUG_ON() in btrfs_init_new_device().

Signed-off-by: Anand Jain 
---
 fs/btrfs/volumes.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9d64700cc9b6..22eb81794375 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2445,14 +2445,14 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
mutex_unlock(_info->chunk_mutex);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
}
 
ret = btrfs_add_device(trans, fs_info, device);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
 
if (seeding_dev) {
@@ -2461,7 +2461,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
ret = btrfs_finish_sprout(trans, fs_info);
if (ret) {
btrfs_abort_transaction(trans, ret);
-   goto error_trans;
+   goto error_sysfs;
}
 
/* Sprouting would change fsid of the mounted root,
@@ -2500,12 +2500,12 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
update_dev_time(device_path);
return ret;
 
-error_trans:
+error_sysfs:
+   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
if (seeding_dev)
sb->s_flags |= MS_RDONLY;
btrfs_end_transaction(trans);
rcu_string_free(device->name);
-   btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
kfree(device);
 error:
blkdev_put(bdev, FMODE_EXCL);
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Wrong device?

2017-09-26 Thread Lukas Pirl
On 09/25/2017 06:11 PM, linux-bt...@oh3mqu.pp.hyper.fi wrote as excerpted:
> After a long googling (about more complex situations) I suddenly
> noticed "device sdb" WTF???  Filesystem is mounted from /dev/md3 (sdb
> is part of that mdraid) so btrfs should not even know anything about
> that /dev/sdb.

I would be interested in explanations regarding this too. It happened
to me as well, that I was confused by /dev/sd* device paths being
printed by btrfs in the logs, even though it runs on /dev/md-*
(/dev/mapper/*) devices exclusively.

> PS. I have noticed another bug too, but I haven't tested it with
> lastest kernels after I noticed that it happens only with
> compression=lzo.  So maybe it is already fixed.  With gzip or none
> compression probem does not happens.  I have email server with about
> 0.5 TB volume. It is using Maildir so it contains huge amount of
> files.  Sometimes some files goes unreadable.  After server reset
> problematic file could be readable again (but not always)...
> 
> But weird thing is that unreadable file always seems to be
> dovecot.index.log.

Confirm this (non-reproducible) behavior on a VPS running Debian
4.5.4-1~bpo8+1.

Lukas

-- 
+49 174 940 74 71
GPG key available via key servers



signature.asc
Description: OpenPGP digital signature