[PATCH 1/3] xfstests: Fix SCRATCH_DEV_POOL handling

2014-04-08 Thread Lukas Czerner
With changes introduced in 667308dd97bf41382d4ab299fa5b56c235cfeb27
it is no longer possible to use SCRATCH_DEV_POOL variable because of
error:

 common/config: Error: $SCRATCH_DEV should be unset when
 $SCRATCH_DEV_POOL is set

This was because get_next_config() would get called twice and hence it
would complain on the second run that SCRATCH_DEV is already set. Fix
it by making sure that we call get_next_config() only once if there
are no sections in the config file.

Also make sure that we export SCRATCH_DEV in the case we're deducing it
from SCRATCH_DEV_POOL.

Signed-off-by: Lukas Czerner lczer...@redhat.com
Reported-by: Filipe David Manana fdman...@gmail.com
---
 common/config | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/common/config b/common/config
index 4178c27..6fa18e2 100644
--- a/common/config
+++ b/common/config
@@ -360,6 +360,10 @@ parse_config_section() {
 }
 
 get_next_config() {
+   if [ ! -z $CONFIG_INCLUDED ]  ! $OPTIONS_HAVE_SECTIONS; then
+   return 0
+   fi
+
local OLD_FSTYP=$FSTYP
local OLD_MOUNT_OPTIONS=$MOUNT_OPTIONS
local OLD_MKFS_OPTIONS=$MKFS_OPTIONS
@@ -414,10 +418,11 @@ get_next_config() {
# to SCRATCH_DEV and rest to SCRATCH_DEV_POOL to maintain the backward 
compatibility
if [ ! -z $SCRATCH_DEV_POOL ]; then
if [ ! -z $SCRATCH_DEV ]; then
-   echo common/config: Error: \$SCRATCH_DEV should be 
unset when \$SCRATCH_DEV_POOL is set
+   echo common/config: Error: \$SCRATCH_DEV 
($SCRATCH_DEV) should be unset when \$SCRATCH_DEV_POOL ($SCRATCH_DEV_POOL) is 
set
exit 1
fi
SCRATCH_DEV=`echo $SCRATCH_DEV_POOL | awk '{print $1}'`
+   export SCRATCH_DEV
fi
 
echo $SCRATCH_DEV | grep -q :  /dev/null 21
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] xfstests: Unset SCRATCH_DEV when deduced from SCRATCH_DEV_POOL

2014-04-08 Thread Lukas Czerner
In the case that we already have sections in the config file we
have to make sure that we unset SCRATCH_DEV if it has been deduced from
the SCRATCH_DEV_POOL so that it does not complain about SCRATCH_DEV in
this case.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 common/config | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/common/config b/common/config
index 6fa18e2..3163801 100644
--- a/common/config
+++ b/common/config
@@ -372,10 +372,15 @@ get_next_config() {
unset MOUNT_OPTIONS
unset MKFS_OPTIONS
unset FSCK_OPTIONS
+   # We might have deduced SCRATCH_DEV from the SCRATCH_DEV_POOL in the 
previous
+   # run, so we have to unset it now.
+   if [ $SCRATCH_DEV_NOT_SET == true ]; then
+   unset SCRATCH_DEV
+   fi
 
parse_config_section $1
 
-   if [ -n $OLD_FSTYP ]  [ $OLD_FSTYP != $FSTYP ]; then
+   if [ ! -z $OLD_FSTYP ]  [ $OLD_FSTYP != $FSTYP ]; then
[ -z $MOUNT_OPTIONS ]  _mount_opts
[ -z $MKFS_OPTIONS ]  _mkfs_opts
[ -z $FSCK_OPTIONS ]  _fsck_opts
@@ -423,6 +428,7 @@ get_next_config() {
fi
SCRATCH_DEV=`echo $SCRATCH_DEV_POOL | awk '{print $1}'`
export SCRATCH_DEV
+   export SCRATCH_DEV_NOT_SET=true
fi
 
echo $SCRATCH_DEV | grep -q :  /dev/null 21
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] xfstests: Fix setting FSTYP automatically

2014-04-08 Thread Lukas Czerner
Currently if the FSTYP is not set, the code to get FSTYP using blikd
would not work. This is because we're using HOSTOS environment variable
which might not be set (at least not on my system) and because it's
already late in the code path.

Fix this by using OSTYP environment variable as a fallback in the case
that HOSTOS does not work and move the check to common/config.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 check |  8 
 common/config | 18 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/check b/check
index 8f1a6e1..ed1834d 100755
--- a/check
+++ b/check
@@ -58,14 +58,6 @@ then
 exit 1
 fi
 
-# Autodetect fs type based on what's on $TEST_DEV unless it's been set
-# externally
-if [ -z $FSTYP -a $HOSTOS == Linux ]; then
-FSTYP=`blkid -c /dev/null -s TYPE -o value $TEST_DEV`
-fi
-FSTYP=${FSTYP:=xfs}
-export FSTYP
-
 SUPPORTED_TESTS=[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]
 SRC_GROUPS=generic shared
 export SRC_DIR=tests
diff --git a/common/config b/common/config
index 3163801..00249e6 100644
--- a/common/config
+++ b/common/config
@@ -297,11 +297,6 @@ _fsck_opts()
esac
 }
 
-[ -z $FSTYP ]  export FSTYP=xfs
-[ -z $MOUNT_OPTIONS ]  _mount_opts
-[ -z $MKFS_OPTIONS ]  _mkfs_opts
-[ -z $FSCK_OPTIONS ]  _fsck_opts
-
 known_hosts()
 {
[ $HOST_CONFIG_DIR ] || HOST_CONFIG_DIR=`pwd`/configs
@@ -446,6 +441,19 @@ get_next_config() {
 if [ -z $CONFIG_INCLUDED ]; then
get_next_config `echo $HOST_OPTIONS_SECTIONS | cut -f1 -d `
export CONFIG_INCLUDED=true
+
+   # Autodetect fs type based on what's on $TEST_DEV unless it's been set
+   # externally
+   if [ -z $FSTYP ]  \
+  [ $HOSTOS == Linux -o $OSTYPE == linux-gnu ]  \
+  [ ! -z $TEST_DEV ]; then
+   FSTYP=`blkid -c /dev/null -s TYPE -o value $TEST_DEV`
+   fi
+   FSTYP=${FSTYP:=xfs}
+   export FSTYP
+   [ -z $MOUNT_OPTIONS ]  _mount_opts
+   [ -z $MKFS_OPTIONS ]  _mkfs_opts
+   [ -z $FSCK_OPTIONS ]  _fsck_opts
 fi
 
 # make sure this script returns success
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] Btrfs-progs: move path modification to filters

2012-12-11 Thread Lukas Czerner
Commit 8e8e019e910f20947fea7eff5da40753639d8870 introduces -a option
which will list all subvolumes with distinguishing between relative and
absolute by prepending absolute patch with FS_TREE.

This commit moves the path modification to a filter code rather than
doing so in path construction in resolve_root(). This gives us more
flexibility in formatting path output.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 btrfs-list.c |   32 +++-
 btrfs-list.h |1 +
 cmds-subvolume.c |   11 +--
 man/btrfs.8.in   |3 ++-
 4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/btrfs-list.c b/btrfs-list.c
index e5f0f96..77d99f8 100644
--- a/btrfs-list.c
+++ b/btrfs-list.c
@@ -628,15 +628,6 @@ static int resolve_root(struct root_lookup *rl, struct 
root_info *ri,
}
 
if (next == BTRFS_FS_TREE_OBJECTID) {
-   char p[] = FS_TREE;
-   add_len = strlen(p);
-   len = strlen(full_path);
-   tmp = malloc(len + add_len + 2);
-   memcpy(tmp + add_len + 1, full_path, len);
-   tmp[add_len] = '/';
-   memcpy(tmp, p, add_len);
-   free(full_path);
-   full_path = tmp;
ri-top_id = next;
break;
}
@@ -1176,6 +1167,28 @@ static int filter_topid_equal(struct root_info *ri, u64 
data)
return ri-top_id == data;
 }
 
+static int filter_full_path(struct root_info *ri, u64 data)
+{
+   if (ri-full_path  ri-top_id != data) {
+   char *tmp;
+   char p[] = FS_TREE;
+   int add_len = strlen(p);
+   int len = strlen(ri-full_path);
+
+   tmp = malloc(len + add_len + 2);
+   if (!tmp) {
+   fprintf(stderr, memory allocation failed\n);
+   exit(1);
+   }
+   memcpy(tmp + add_len + 1, ri-full_path, len);
+   tmp[add_len] = '/';
+   memcpy(tmp, p, add_len);
+   free(ri-full_path);
+   ri-full_path = tmp;
+   }
+   return 1;
+}
+
 static btrfs_list_filter_func all_filter_funcs[] = {
[BTRFS_LIST_FILTER_ROOTID]  = filter_by_rootid,
[BTRFS_LIST_FILTER_SNAPSHOT_ONLY]   = filter_snapshot,
@@ -1187,6 +1200,7 @@ static btrfs_list_filter_func all_filter_funcs[] = {
[BTRFS_LIST_FILTER_CGEN_LESS]   = filter_cgen_less,
[BTRFS_LIST_FILTER_CGEN_EQUAL]  = filter_cgen_equal,
[BTRFS_LIST_FILTER_TOPID_EQUAL] = filter_topid_equal,
+   [BTRFS_LIST_FILTER_FULL_PATH]   = filter_full_path,
 };
 
 struct btrfs_list_filter_set *btrfs_list_alloc_filter_set(void)
diff --git a/btrfs-list.h b/btrfs-list.h
index cde4b3c..f7fbea6 100644
--- a/btrfs-list.h
+++ b/btrfs-list.h
@@ -71,6 +71,7 @@ enum btrfs_list_filter_enum {
BTRFS_LIST_FILTER_CGEN_LESS,
BTRFS_LIST_FILTER_CGEN_MORE,
BTRFS_LIST_FILTER_TOPID_EQUAL,
+   BTRFS_LIST_FILTER_FULL_PATH,
BTRFS_LIST_FILTER_MAX,
 };
 
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index ac39f7b..37cb8cc 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -277,7 +277,9 @@ static const char * const cmd_subvol_list_usage[] = {
List subvolumes (and snapshots),
,
-p   print parent ID,
-   -a   print all the subvolumes in the filesystem.,
+   -a   print all the subvolumes in the filesystem and,
+distinguish absolute and relative path with respect,
+to the given path,
-u   print the uuid of subvolumes (and snapshots),
-t   print the result as a table,
-s   list snapshots only in the filesystem,
@@ -400,7 +402,12 @@ static int cmd_subvol_list(int argc, char **argv)
}
 
top_id = btrfs_list_get_path_rootid(fd);
-   if (!is_list_all)
+
+   if (is_list_all)
+   btrfs_list_setup_filter(filter_set,
+   BTRFS_LIST_FILTER_FULL_PATH,
+   top_id);
+   else
btrfs_list_setup_filter(filter_set,
BTRFS_LIST_FILTER_TOPID_EQUAL,
top_id);
diff --git a/man/btrfs.8.in b/man/btrfs.8.in
index 9222580..d6d8e94 100644
--- a/man/btrfs.8.in
+++ b/man/btrfs.8.in
@@ -124,7 +124,8 @@ and top level. The parent's ID may be used at mount time 
via the
 
 \fB-t\fP print the result as a table.
 
-\fB-a\fP print all the subvolumes in the filesystem.
+\fB-a\fP print all the subvolumes in the filesystem and distinguish between
+absolute and relative path with respect to the given path.
 
 \fB-r\fP only readonly subvolumes in the filesystem wille be listed.
 
-- 
1.7.7.6

[PATCH 3/3] Btrfs-progs: List all subvolumes by default

2012-12-11 Thread Lukas Czerner
Commit a1e89891eb6af5381539d9875b85c196150171b6 changed subvolume list
command so that we list only subvolumes under the specified directory.
However this is confusing and unnecessary obstacle, because one usually
want to see all subvolumes in the file system. It was introduced with
the notion the full_path may be invalid which is not exactly true as the
full_path is always relative to the root subvolume which makes perfect
sense.

Simply making option '-a' default is not enough since it introduces the
relative/absolute path distinction effectively obfuscating the subvolume
nesting.

This commit returns the subvolume list command behaviour before commit
a1e89891eb6af5381539d9875b85c196150171b6 where we list all subvolumes in
the filesystem with path naming from root subovolume. IMO this is the
best default as it is well understood and gives all the important
information about file system subvolumes including subvolume nesting
without the need to parse additional information.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 btrfs-list.c |8 
 btrfs-list.h |2 +-
 cmds-subvolume.c |7 ---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/btrfs-list.c b/btrfs-list.c
index 77d99f8..ac6e1f0 100644
--- a/btrfs-list.c
+++ b/btrfs-list.c
@@ -1283,12 +1283,11 @@ static void __filter_and_sort_subvol(struct root_lookup 
*all_subvols,
struct root_lookup *sort_tree,
struct btrfs_list_filter_set *filter_set,
struct btrfs_list_comparer_set *comp_set,
-   int fd)
+   u64 top_id)
 {
struct rb_node *n;
struct root_info *entry;
int ret;
-   u64 top_id = btrfs_list_get_path_rootid(fd);
 
root_lookup_init(sort_tree);
 
@@ -1457,11 +1456,12 @@ static void print_all_volume_info(struct root_lookup 
*sorted_tree,
 
 int btrfs_list_subvols(int fd, struct btrfs_list_filter_set *filter_set,
   struct btrfs_list_comparer_set *comp_set,
-  int is_tab_result)
+  int is_tab_result, int full_path)
 {
struct root_lookup root_lookup;
struct root_lookup root_sort;
int ret;
+   u64 top_id = (full_path ? 0 : btrfs_list_get_path_rootid(fd));
 
ret = __list_subvol_search(fd, root_lookup);
if (ret) {
@@ -1479,7 +1479,7 @@ int btrfs_list_subvols(int fd, struct 
btrfs_list_filter_set *filter_set,
return ret;
 
__filter_and_sort_subvol(root_lookup, root_sort, filter_set,
-comp_set, fd);
+comp_set, top_id);
 
print_all_volume_info(root_sort, is_tab_result);
__free_all_subvolumn(root_lookup);
diff --git a/btrfs-list.h b/btrfs-list.h
index f7fbea6..658e1ad 100644
--- a/btrfs-list.h
+++ b/btrfs-list.h
@@ -101,7 +101,7 @@ int btrfs_list_setup_comparer(struct 
btrfs_list_comparer_set **comp_set,
 
 int btrfs_list_subvols(int fd, struct btrfs_list_filter_set *filter_set,
   struct btrfs_list_comparer_set *comp_set,
-   int is_tab_result);
+  int is_tab_result, int full_path);
 int btrfs_list_find_updated_files(int fd, u64 root_id, u64 oldest_gen);
 int btrfs_list_get_default_subvolume(int fd, u64 *default_id);
 char *btrfs_list_path_for_root(int fd, u64 root);
diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 0aa7467..88158f0 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -310,7 +310,7 @@ static int cmd_subvol_list(int argc, char **argv)
char *subvol;
int is_tab_result = 0;
int is_list_all = 0;
-   int is_only_in_path = 1;
+   int is_only_in_path = 0;
struct option long_options[] = {
{sort, 1, NULL, 'S'},
{0, 0, 0, 0}
@@ -418,7 +418,8 @@ static int cmd_subvol_list(int argc, char **argv)
top_id);
 
ret = btrfs_list_subvols(fd, filter_set, comparer_set,
-   is_tab_result);
+   is_tab_result,
+   !is_list_all  !is_only_in_path);
if (ret)
return 19;
return 0;
@@ -624,7 +625,7 @@ static int cmd_subvol_get_default(int argc, char **argv)
btrfs_list_setup_filter(filter_set, BTRFS_LIST_FILTER_ROOTID,
default_id);
 
-   ret = btrfs_list_subvols(fd, filter_set, NULL, 0);
+   ret = btrfs_list_subvols(fd, filter_set, NULL, 0, 1);
if (ret)
return 19;
return 0;
-- 
1.7.7.6

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] Btrfs-progs: add '-o' option into subvolume list command

2012-12-11 Thread Lukas Czerner
This commit introduces new option '-o' to list only subvolumes under the
specified path. This does not change subvolume list  behaviour. It has
been default in the past and it is even with this commit.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 cmds-subvolume.c |   11 ---
 man/btrfs.8.in   |6 --
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/cmds-subvolume.c b/cmds-subvolume.c
index 37cb8cc..0aa7467 100644
--- a/cmds-subvolume.c
+++ b/cmds-subvolume.c
@@ -272,7 +272,7 @@ out:
 }
 
 static const char * const cmd_subvol_list_usage[] = {
-   btrfs subvolume list [-apurts] [-g [+|-]value] [-c [+|-]value] 
+   btrfs subvolume list [-aopurts] [-g [+|-]value] [-c [+|-]value] 
[--sort=gen,ogen,rootid,path] path,
List subvolumes (and snapshots),
,
@@ -280,6 +280,7 @@ static const char * const cmd_subvol_list_usage[] = {
-a   print all the subvolumes in the filesystem and,
 distinguish absolute and relative path with respect,
 to the given path,
+   -o   print only subvolumes bellow specified path,
-u   print the uuid of subvolumes (and snapshots),
-t   print the result as a table,
-s   list snapshots only in the filesystem,
@@ -309,6 +310,7 @@ static int cmd_subvol_list(int argc, char **argv)
char *subvol;
int is_tab_result = 0;
int is_list_all = 0;
+   int is_only_in_path = 1;
struct option long_options[] = {
{sort, 1, NULL, 'S'},
{0, 0, 0, 0}
@@ -320,7 +322,7 @@ static int cmd_subvol_list(int argc, char **argv)
optind = 1;
while(1) {
c = getopt_long(argc, argv,
-   apsurg:c:t, long_options, NULL);
+   aopsurg:c:t, long_options, NULL);
if (c  0)
break;
 
@@ -331,6 +333,9 @@ static int cmd_subvol_list(int argc, char **argv)
case 'a':
is_list_all = 1;
break;
+   case 'o':
+   is_only_in_path = 1;
+   break;
case 't':
is_tab_result = 1;
break;
@@ -407,7 +412,7 @@ static int cmd_subvol_list(int argc, char **argv)
btrfs_list_setup_filter(filter_set,
BTRFS_LIST_FILTER_FULL_PATH,
top_id);
-   else
+   else if (is_only_in_path)
btrfs_list_setup_filter(filter_set,
BTRFS_LIST_FILTER_TOPID_EQUAL,
top_id);
diff --git a/man/btrfs.8.in b/man/btrfs.8.in
index d6d8e94..1e314eb 100644
--- a/man/btrfs.8.in
+++ b/man/btrfs.8.in
@@ -11,7 +11,7 @@ btrfs \- control a btrfs filesystem
 .PP
 \fBbtrfs\fP \fBsubvolume create\fP\fI [dest/]name\fP
 .PP
-\fBbtrfs\fP \fBsubvolume list\fP\fI [-aprts] [-g [+|-]value] [-c [+|-]value] 
[--rootid=rootid,gen,ogen,path] path\fP
+\fBbtrfs\fP \fBsubvolume list\fP\fI [-aoprts] [-g [+|-]value] [-c [+|-]value] 
[--rootid=rootid,gen,ogen,path] path\fP
 .PP
 \fBbtrfs\fP \fBsubvolume set-default\fP\fI id path\fP
 .PP
@@ -108,7 +108,7 @@ Create a subvolume in \fIdest\fR (or in the current 
directory if
 \fIdest\fR is omitted).
 .TP
 
-\fBsubvolume list\fR\fI [-aprts][-g [+|-]value] [-c [+|-]value] 
[--sort=gen,ogen,rootid,path] path\fR
+\fBsubvolume list\fR\fI [-aoprts][-g [+|-]value] [-c [+|-]value] 
[--sort=gen,ogen,rootid,path] path\fR
 .RS
 List the subvolumes present in the filesystem \fIpath\fR. For every
 subvolume the following information is shown by default.
@@ -127,6 +127,8 @@ and top level. The parent's ID may be used at mount time 
via the
 \fB-a\fP print all the subvolumes in the filesystem and distinguish between
 absolute and relative path with respect to the given path.
 
+\fB-o\fP print only subvolumes bellow specified path.
+
 \fB-r\fP only readonly subvolumes in the filesystem wille be listed.
 
 \fB-s\fP only snapshot subvolumes in the filesystem will  be listed.
-- 
1.7.7.6

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: get the device in write mode when deleting it

2012-12-07 Thread Lukas Czerner
When we're deleting the device we should get it in write mode since
we're going to re-write the super block magic on that device. And it
should fail if the device is read-only.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 fs/btrfs/volumes.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 95c6f7d..e3b9b36 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1384,7 +1384,7 @@ int btrfs_rm_device(struct btrfs_root *root, char 
*device_path)
goto out;
}
} else {
-   bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
+   bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
  root-fs_info-bdev_holder);
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
-- 
1.7.7.6

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: Notify udev when removing device

2012-12-06 Thread Lukas Czerner
Currently udev does not know about the device being removed from the
file system. This may result in the situation where we're unable to
mount the file system by UUID or by LABEL because the by-uuid and
by-label links may still point to the device which is no longer part of
the btrfs file system and hence does not have any btrfs super block.

It can be easily reproduced by the following:

mkfs.btrfs -L bugfs /dev/loop[0-6]
mount /dev/loop0 /mnt/test
btrfs device delete /dev/loop0 /mnt/test
umount /mnt/test

mount LABEL=bugfs /mnt/test  this fails

then see:

ls -l /dev/disk/by-label/bugfs

which will still point to the /dev/loop0

We did not noticed this before because libblkid would send the udev
event for us when it notice that the link does not fit the reality,
however it does not do that anymore and completely relies on udev
information.

Fix this by sending the KOBJ_CHANGE event to the bdev kobject after
successful device removal.

Note that this does not affect device addition, because we will open the
device prior the addition from userspace and udev will notice that and
reread the device afterwards.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 fs/btrfs/volumes.c |   16 
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0f5ebb7..95c6f7d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -71,6 +71,19 @@ static void free_fs_devices(struct btrfs_fs_devices 
*fs_devices)
kfree(fs_devices);
 }
 
+static void btrfs_kobject_uevent(struct block_device *bdev,
+enum kobject_action action)
+{
+   int ret;
+
+   ret = kobject_uevent(disk_to_dev(bdev-bd_disk)-kobj, action);
+   if (ret)
+   pr_warn(Sending event '%d' to kobject: '%s' (%p): failed\n,
+   action,
+   kobject_name(disk_to_dev(bdev-bd_disk)-kobj),
+   disk_to_dev(bdev-bd_disk)-kobj);
+}
+
 void btrfs_cleanup_fs_uuids(void)
 {
struct btrfs_fs_devices *fs_devices;
@@ -1493,6 +1506,9 @@ int btrfs_rm_device(struct btrfs_root *root, char 
*device_path)
 
ret = 0;
 
+   /* Notify udev that device has changed */
+   btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+
 error_brelse:
brelse(bh);
 error_close:
-- 
1.7.7.6

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: Return EINVAL when length to trim is less than FSB

2012-10-16 Thread Lukas Czerner
Currently if len argument in btrfs_ioctl_fitrim() is smaller than
one FSB we will continue and finally return 0 bytes discarded.
However if the length to discard is smaller then file system block
we should really return EINVAL.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 fs/btrfs/ioctl.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6116880..3b8b509 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -343,7 +343,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, 
void __user *arg)
return -EOPNOTSUPP;
if (copy_from_user(range, arg, sizeof(range)))
return -EFAULT;
-   if (range.start  total_bytes)
+   if (range.start  total_bytes ||
+   range.len  fs_info-sb-s_blocksize)
return -EINVAL;
 
range.len = min(range.len, total_bytes - range.start);
-- 
1.7.7.6

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: getdents - ext4 vs btrfs performance

2012-03-14 Thread Lukas Czerner
On Tue, 13 Mar 2012, Ted Ts'o wrote:

 On Tue, Mar 13, 2012 at 04:22:52PM -0400, Phillip Susi wrote:
  
  I think a format change would be preferable to runtime sorting.
 
 Are you volunteering to spearhead the design and coding of such a
 thing?  Run-time sorting is backwards compatible, and a heck of a lot
 easier to code and test...

Actually I am in the process of creating some projects for the local
universities and this certainly sounds like something that some student
can do. I have already put that into the proposal, so we might have
someone looking into this in the near future. Since the problem has been
there since forever, I think that it's not that critical to solve it
right away.

I kind of like the idea about having the separate btree with inode
numbers for the directory reading, just because it does not affect
allocation policy nor the write performance which is a good thing. Also
it has been done before in btrfs and it works very well for them. The
only downside (aside from format change) is the extra step when removing
the directory entry, but the positives outperform the negatives.


Maybe this might be even done in a way which does not require format
change. We can have new inode flag (say EXT4_INUM_INDEX_FL) which will
tell us that there is a inode number btree to use on directory reads.
Then the pointer to the root of that tree would be stored at the end of
the root block of the hree (of course if there is still some space for
it) and the rest is easy.

So If we mount the old file system with the new code, there will not be
the flag, but with newly added directories it could be used anyway. Also
e2fsck could easily add the inumber tree into the directory if it is
missing.

If we mount the new file system with the old code (the one which does
not know about this feature), everything would stay the same and we
would not touch the inumber tree at all. Of course there is a
possibility that we would rewrite the end of the root htree, overwriting
the pointer to the inumber tree and effectively losing one block. We
would not actually care that we rewrote this information because we
do not actually need it, so the only downside is the one block lost,
which is not that bad I guess.

Now, we have rewritten the pointer to the inumber tree and we're going
to mount it with the new code again. It would expect the the inumber
tree to be there, but not blindly. Some king of magic information would
have to be stored in the inumber root pointer so we can be sure that it
really is what we're looking for and if it is not there, well then we do
not care and walk the directory in the old way. And we can alway create
the inumber tree in the new directory. And again e2fsck should be able
to fix that for us as well.

So that is just an idea, I have not looked at the actual code to see it
it would be really possible, but it certainly seems like so. Am I
missing something ?


Anyway, if there is going to be a format change I agree that having
solution for those who are not comfortable with the format change is
equally as important. But maybe there will be better solution which does
not require the format change.

Thanks!
-Lukas

 
 The reality is we'd probably want to implement run-time sorting
 *anyway*, for the vast majority of people who don't want to convert to
 a new incompatible file system format.  (Even if you can do the
 conversion using e2fsck --- which is possible, but it would be even
 more code to write --- system administrators tend to be very
 conservative about such things, since they might need to boot an older
 kernel, or use a rescue CD that doesn't have an uptodate kernel or
 file system utilities, etc.)
 
  So the index nodes contain the hash ranges for the leaf block, but
  the leaf block only contains the regular directory entries, not a
  hash for each name?  That would mean that adding or removing names
  would require moving around the regular directory entries wouldn't
  it?
 
 They aren't sorted in the leaf block, so we only need to move around
 regular directory entries when we do a node split (and at the moment
 we don't support shrinking directories), so we don't have to worry the
 reverse case.
 
  I would think that hash collisions are rare enough that reading a
  directory block you end up not needing once in a blue moon would be
  chalked up under who cares.  So just stick with hash, offset pairs
  to map the hash to the normal directory entry.
 
 With a 64-bit hash, and if we were actually going to implement this as
 a new incompatible feature, you're probably right in terms of
 accepting the extra directory block search.
 
 We would still have to implement the case where hash collisions *do*
 exist, though, and make sure the right thing happens in that case.
 Even if the chance of that happening is 1 in 2**32, with enough
 deployed systems (i.e., every Android handset, etc.) it's going to
 happen in real life.
 
   - Ted
--
To unsubscribe 

Re: getdents - ext4 vs btrfs performance

2012-03-14 Thread Lukas Czerner
On Wed, 14 Mar 2012, Ted Ts'o wrote:

 On Wed, Mar 14, 2012 at 09:12:02AM +0100, Lukas Czerner wrote:
  I kind of like the idea about having the separate btree with inode
  numbers for the directory reading, just because it does not affect
  allocation policy nor the write performance which is a good thing. Also
  it has been done before in btrfs and it works very well for them. The
  only downside (aside from format change) is the extra step when removing
  the directory entry, but the positives outperform the negatives.
 
 Well, there will be extra (journaled!) block reads and writes involved
 when adding or removing directory entries.  So the performance on
 workloads that do a large number of directory adds and removed will
 have to be impacted.  By how much is not obvious, but it will be
 something which is measurable.

Yes, it would have to be measured in various workloads.

 
  Maybe this might be even done in a way which does not require format
  change. We can have new inode flag (say EXT4_INUM_INDEX_FL) which will
  tell us that there is a inode number btree to use on directory reads.
  Then the pointer to the root of that tree would be stored at the end of
  the root block of the hree (of course if there is still some space for
  it) and the rest is easy.
 
 You can make it be a RO_COMPAT change instead of an INCOMPAT change,
 yes.

Does it have to be RO_COMPAT change though ? Since this would be both
forward and backward compatible.

 
 And if you want to do it as simply as possible, we could just recycle
 the current htree approach for the second tree, and simply store the
 root in another set of directory blocks.  But by putting the index
 nodes in the directory blocks, masquerading as deleted directories, it
 means that readdir() has to cycle through them and ignore the index
 blocks.
 
 The alternate approach is to use physical block numbers instead of
 logical block numbers for the index blocks, and to store it separately
 from the blocks containing actual directory entries.  But if we do
 that for the inumber tree, the next question that arise is maybe we
 should do that for the hash tree as well --- and then once you upon
 that can of worms, it gets a lot more complicated.
 
 So the question that really arises here is how wide open do we want to
 leave the design space, and whether we are optimizing for the best
 possible layout change ignoring the amount of implementation work it
 might require, or whether we keep things as simple as possible from a
 code change perspective.

What do you expect to gain by storing the index nodes outside the
directory entries ? Only to avoid reading the index nodes of both trees
when walking just one of it ? If I understand it correctly directory
blocks are laid out sequentially so reading it in cold cache state would
not pose much troubles I think. Nor the iteration through all of the
index nodes. However it would have to be measured first.

 
 There are good arguments that can be made either way, and a lot
 depends on the quality of the students you can recruit, the amount of
 time they have, and how much review time it will take out of the core
 team during the design and implementation phase.

I am always hoping for the best :). We have had a good expedience with
the students here, but you'll never know that in advance. They'll have
the whole year to get through this, which is plenty of thine, but the
question of course is whether they'll use that time wisely :). But
anyway, I think it is definitely worth a try.

Thanks!
-Lukas

 
 Regards,
 
   - Ted
 

-- 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: getdents - ext4 vs btrfs performance

2012-02-29 Thread Lukas Czerner
On Wed, 29 Feb 2012, Chris Mason wrote:

 On Wed, Feb 29, 2012 at 02:31:03PM +0100, Jacek Luczak wrote:
  Hi All,
  
  Long story short: We've found that operations on a directory structure
  holding many dirs takes ages on ext4.
  
  The Question: Why there's that huge difference in ext4 and btrfs? See
  below test results for real values.
  
  Background: I had to backup a Jenkins directory holding workspace for
  few projects which were co from svn (implies lot of extra .svn dirs).
  The copy takes lot of time (at least more than I've expected) and
  process was mostly in D (disk sleep). I've dig more and done some
  extra test to see if this is not a regression on block/fs site. To
  isolate the issue I've also performed same tests on btrfs.
  
  Test environment configuration:
  1) HW: HP ProLiant BL460 G6, 48 GB of memory, 2x 6 core Intel X5670 HT
  enabled, Smart Array P410i, RAID 1 on top of 2x 10K RPM SAS HDDs.
  2) Kernels: All tests were done on following kernels:
   - 2.6.39.4-3 -- the build ID (3) is used here for internal tacking of
  config changes mostly. In -3 we've introduced ,,fix readahead pipeline
  break caused by block plug'' patch. Otherwise it's pure 2.6.39.4.
   - 3.2.7 -- latest kernel at the time of testing (3.2.8 has been
  release recently).
  3) A subject of tests, directory holding:
   - 54GB of data (measured on ext4)
   - 1978149 files
   - 844008 directories
  4) Mount options:
   - ext4 -- errors=remount-ro,noatime,data=writeback
   - btrfs -- noatime,nodatacow and for later investigation on
  copression effect: noatime,nodatacow,compress=lzo
 
 For btrfs, nodatacow and compression don't really mix.  The compression
 will just override it. (Just FYI, not really related to these results).
 
  
  In all tests I've been measuring time of execution. Following tests
  were performed:
  - find . -type d
  - find . -type f
  - cp -a
  - rm -rf
  
  Ext4 results:
  | Type | 2.6.39.4-3   | 3.2.7
  | Dir cnt  | 17m 40sec  | 11m 20sec
  | File cnt |  17m 36sec | 11m 22sec
  | Copy| 1h 28m| 1h 27m
  | Remove| 3m 43sec
 
 Are the btrfs numbers missing? ;)
 
 In order for btrfs to be faster for cp -a, the files probably didn't
 change much since creation.  Btrfs maintains extra directory indexes
 that help in sequential backup scans, but this usually means slower
 delete performance.

Exactly and IIRC ext4 have directory entries stored in hash order which
does not really help the sequential access.

 
 But, how exactly did you benchmark it?  If you compare a fresh
 mkfs.btrfs where you just copied all the data over with an ext4 FS that
 has been on the disk for a long time, it isn't quite fair to ext4.

I have the same question, note that if the files on ext4 has been worked
with it may very well be that directory hash trees are not in very good
shape. You can attempt to optimize that by e2fsck (just run fsck.ext4 -f
device) but that may take quite some time and memory, but it is worth
trying.

Thanks!
-Lukas

 
 -chris
 --
 To unsubscribe from this list: send the line unsubscribe linux-ext4 in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] xfstests: fix 251's cp -axT problem

2012-01-11 Thread Lukas Czerner
On Wed, 11 Jan 2012, Christoph Hellwig wrote:

 On Tue, Jan 10, 2012 at 07:39:20PM +0800, Liu Bo wrote:
  When I ran xfstests, 251 got failed cause cp -axT did not work as wish:
  cp: cannot overwrite directory `/mnt/scratch/1' with non-directory
  
  With this patch, 251 has passed.
 
 Why would cp give that message with a missing /?
 
 I'm not against putting this in, but I'd like to understand what's going
 on.
 
 Lukas, any idea?
 

Hi Christoph,

the only reason I can think of is probably that Liu is accessing the
xfstests directory via symbolic link, hence the '$content' addresses the
symbolic link and cp is trying to overwrite the directory with
non-directory (symlink).

The fix is fine for both cases (xfstests as symlink and directory), confirmed
with a simple test.

Thanks!
-Lukas
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


System Storage Manager

2011-12-07 Thread Lukas Czerner
Hello everyone,

I would like to introduce to you a new tool called System Storage
Manager (ssm). It is supposed to provide easy to use command line
interface to manage your storage using various technologies like
lvm, btrfs, encrypted volumes and possibly more.

Background
--

In more sophisticated enterprise storage environments, management with
Device Mapper (dm), Logical Volume Manager (LVM), or Multiple Devices
(md) is becoming increasingly more difficult. With file systems added
to the mix, the number of tools needed to configure and manage storage
has grown so large that it is simply not user friendly. With so many
options for a system administrator to consider, the opportunity for
errors and problems is large.

The btrfs administration tools have shown us that storage management can
be simplified, and we are working to bring that ease of use to Linux
filesystems in general.


You can also find some more information in my presentation from LinuxCon
Prague this year:

http://people.redhat.com/lczerner/files/lczerner_fsm.pdf


The code is still under development and no release has been made yet, but
I would like to share with you what I have done so far, since the
progress has been a bit slower than I have previously expected. The
project lives on the sf.net :

https://sourceforge.net/projects/storagemanager/

and you can grab source files from git repository here:

https://sourceforge.net/p/storagemanager/code/ci/a1a5fd616d06030f94b9d2e80ee6ebcad09ad35f/tree/

More information can be found on the projects home page, or in the
README file.

https://sourceforge.net/p/storagemanager/home/Home/


Notes
-

- It is written in python
- So far it supports commands : check, resize, create, list, add, remove
- More commands to come : mirror, snapshot(!)
- It does not support raid yet, except raid 0 from lvm (striped volume)
- It has been tested with python 2.7, but would like to make it work on
  python 2.6 as well.
- It comes with some doctests, unittests and regression tests written in
  bash (although it is for lvm only so far)
- Its modular design should make it relatively simple to add support
  for more back-ends other than lvm, or btrfs


Things to be done before the actual release
---

- Create btrfs bash tests
- Create btrfs unittests
- Extend python unittests to other backends
- Use wipefs -a before using the device in add()
- Consider using wipefs -a after removing the device
- Remove the physical volume after it is removed from the group
- Figure out how to create better pool names so it is unique in the system and
  between the systems.
- Add mirror support
- Add snapshots support
- Add raid support
- use lsblk and blkid to get information
- Better table alignment when the output spans multiple lines
- Better error handling - not just plain Exception, but rather named exception
  and handle it as main() in ssm module
- Update readme
- Add more documentation into the code


Or course any comment or ideas would be highly appreciated. Please
report bugs directly to me.

Thanks!
-Lukas
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: return EINVAL if start total_bytes in fitrim ioctl

2011-09-05 Thread Lukas Czerner
We should retirn EINVAL if the start is beyond the end of the file
system in the btrfs_ioctl_fitrim(). Fix that by adding the appropriate
check for it.

Also in the btrfs_trim_fs() it is possible that len+start might overflow
if big values are passed. Fix it by decrementing the len so that start+len
is equal to the file system size in the worst case.

Signed-off-by: Lukas Czerner lczer...@redhat.com
---
 fs/btrfs/ioctl.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 970977a..ad890ee 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -277,6 +277,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, 
void __user *arg)
struct fstrim_range range;
u64 minlen = ULLONG_MAX;
u64 num_devices = 0;
+   u64 total_bytes = btrfs_super_total_bytes(root-fs_info-super_copy);
int ret;
 
if (!capable(CAP_SYS_ADMIN))
@@ -295,12 +296,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, 
void __user *arg)
}
}
rcu_read_unlock();
+
if (!num_devices)
return -EOPNOTSUPP;
-
if (copy_from_user(range, arg, sizeof(range)))
return -EFAULT;
+   if (range.start  total_bytes)
+   return -EINVAL;
 
+   range.len = min(range.len, total_bytes - range.start);
range.minlen = max(range.minlen, minlen);
ret = btrfs_trim_fs(root, range);
if (ret  0)
-- 
1.7.4.4

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/5] btrfs: use fs netlink interface for ENOSPC conditions

2011-08-18 Thread Lukas Czerner
Register fs netlink interface and send proper warning if ENOSPC is
encountered. Note that we differentiate between enospc for metadata and
enospc for data.

Signed-off-by: Lukas Czerner lczer...@redhat.com
Cc: Chris Mason chris.ma...@oracle.com
Cc: linux-btrfs@vger.kernel.org
---
 fs/btrfs/extent-tree.c |   13 ++---
 fs/btrfs/super.c   |1 +
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 66bac22..a47d9b9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3029,13 +3029,14 @@ int btrfs_check_data_free_space(struct inode *inode, 
u64 bytes)
 {
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)-root;
+   struct btrfs_fs_info *fs_info = root-fs_info;
u64 used;
int ret = 0, committed = 0, alloc_chunk = 1;
 
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root-sectorsize - 1)  ~((u64)root-sectorsize - 1);
 
-   if (root == root-fs_info-tree_root ||
+   if (root == fs_info-tree_root ||
BTRFS_I(inode)-location.objectid == BTRFS_FREE_INO_OBJECTID) {
alloc_chunk = 0;
committed = 1;
@@ -3070,7 +3071,7 @@ alloc:
if (IS_ERR(trans))
return PTR_ERR(trans);
 
-   ret = do_chunk_alloc(trans, root-fs_info-extent_root,
+   ret = do_chunk_alloc(trans, fs_info-extent_root,
 bytes + 2 * 1024 * 1024,
 alloc_target,
 CHUNK_ALLOC_NO_FORCE);
@@ -3100,7 +3101,7 @@ alloc:
/* commit the current transaction and try again */
 commit_trans:
if (!committed 
-   !atomic_read(root-fs_info-open_ioctl_trans)) {
+   !atomic_read(fs_info-open_ioctl_trans)) {
committed = 1;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
@@ -3111,6 +3112,8 @@ commit_trans:
goto again;
}
 
+   fs_nl_send_warning(fs_info-fs_devices-latest_bdev-bd_dev,
+  FS_NL_ENOSPC_WARN);
return -ENOSPC;
}
data_sinfo-bytes_may_use += bytes;
@@ -3522,6 +3525,10 @@ again:
}
 
 out:
+   if (unlikely(-ENOSPC == ret)) {
+   dev_t bdev = root-fs_info-fs_devices-latest_bdev-bd_dev;
+   fs_nl_send_warning(bdev, FS_NL_META_ENOSPC_WARN);
+   }
if (flushing) {
spin_lock(space_info-lock);
space_info-flush = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 15634d4..8ac9e01 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1266,6 +1266,7 @@ static int __init init_btrfs_fs(void)
if (err)
goto unregister_ioctl;
 
+   init_fs_nl_family();
printk(KERN_INFO %s loaded\n, BTRFS_BUILD_VERSION);
return 0;
 
-- 
1.7.4.4

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] btrfs: use fs netlink interface for ENOSPC conditions

2011-08-18 Thread Lukas Czerner
On Thu, 18 Aug 2011, David Sterba wrote:

 Hi,
 
 I see you are mixing a cleanup while adding a new feature.
 
 On Thu, Aug 18, 2011 at 02:18:26PM +0200, Lukas Czerner wrote:
  Register fs netlink interface and send proper warning if ENOSPC is
  encountered. Note that we differentiate between enospc for metadata and
  enospc for data.
  
  Signed-off-by: Lukas Czerner lczer...@redhat.com
  Cc: Chris Mason chris.ma...@oracle.com
  Cc: linux-btrfs@vger.kernel.org
  ---
   fs/btrfs/extent-tree.c |   13 ++---
   fs/btrfs/super.c   |1 +
   2 files changed, 11 insertions(+), 3 deletions(-)
  
  diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
  index 66bac22..a47d9b9 100644
  --- a/fs/btrfs/extent-tree.c
  +++ b/fs/btrfs/extent-tree.c
  @@ -3029,13 +3029,14 @@ int btrfs_check_data_free_space(struct inode 
  *inode, u64 bytes)
 
 hm, if diff is correct with the context, you are changing function
 btrfs_check_data_free_space and reporting it as metadata ENOSPC later.
 Although it's called for metadata (or I should say non-user file blocks,
 like space cache, ino cache) block allocation, it's called from
 btrfs_fallocate too. Seems that there has to be additional flag to say
 if it's really metadata or data.

Actually diff got the context wrong. Sometimes it has some difficulties
if there are labels involved.

 
   {
  struct btrfs_space_info *data_sinfo;
  struct btrfs_root *root = BTRFS_I(inode)-root;
  +   struct btrfs_fs_info *fs_info = root-fs_info;
 
 change unrealated to ENOSPC-netlink

Not really. I am using fs_info to get reference to the last_bdev and
it is useful for other places as well.

 
  u64 used;
  int ret = 0, committed = 0, alloc_chunk = 1;
   
  /* make sure bytes are sectorsize aligned */
  bytes = (bytes + root-sectorsize - 1)  ~((u64)root-sectorsize - 1);
   
  -   if (root == root-fs_info-tree_root ||
  +   if (root == fs_info-tree_root ||
 
 here
 
  BTRFS_I(inode)-location.objectid == BTRFS_FREE_INO_OBJECTID) {
  alloc_chunk = 0;
  committed = 1;
  @@ -3070,7 +3071,7 @@ alloc:
  if (IS_ERR(trans))
  return PTR_ERR(trans);
   
  -   ret = do_chunk_alloc(trans, root-fs_info-extent_root,
  +   ret = do_chunk_alloc(trans, fs_info-extent_root,
 
 here
 
   bytes + 2 * 1024 * 1024,
   alloc_target,
   CHUNK_ALLOC_NO_FORCE);
  @@ -3100,7 +3101,7 @@ alloc:
  /* commit the current transaction and try again */
   commit_trans:
  if (!committed 
  -   !atomic_read(root-fs_info-open_ioctl_trans)) {
  +   !atomic_read(fs_info-open_ioctl_trans)) {
 
 here
 
  committed = 1;
  trans = btrfs_join_transaction(root);
  if (IS_ERR(trans))
  @@ -3111,6 +3112,8 @@ commit_trans:
  goto again;
  }
   
  +   fs_nl_send_warning(fs_info-fs_devices-latest_bdev-bd_dev,
  +  FS_NL_ENOSPC_WARN);
 
 or is it due to this line being too long with root- ? :)

exactly :) But as I said fs_info is referenced on other paces as well so
it is useful anyway.

 
  return -ENOSPC;
  }
  data_sinfo-bytes_may_use += bytes;
  @@ -3522,6 +3525,10 @@ again:
  }
   
   out:
  +   if (unlikely(-ENOSPC == ret)) {
 
 'unlikely' is not needed here, it does not bring anything compiler
 wouldn't know, static branch prediction will give low probabiliy to this
 check anyway

Ok, but I would like to know why do you think that. It is not only in
the error path and there are actually several paths to this condition so
maybe I am being dense, could you explain it a bit for me ?

Thanks!
-Lukas

 
  +   dev_t bdev = root-fs_info-fs_devices-latest_bdev-bd_dev;
  +   fs_nl_send_warning(bdev, FS_NL_META_ENOSPC_WARN);
  +   }
  if (flushing) {
  spin_lock(space_info-lock);
  space_info-flush = 0;
  diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
  index 15634d4..8ac9e01 100644
  --- a/fs/btrfs/super.c
  +++ b/fs/btrfs/super.c
  @@ -1266,6 +1266,7 @@ static int __init init_btrfs_fs(void)
  if (err)
  goto unregister_ioctl;
   
  +   init_fs_nl_family();
  printk(KERN_INFO %s loaded\n, BTRFS_BUILD_VERSION);
  return 0;
 
 
 david
 

-- 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/8] ext4: remove i_alloc_sem abuse

2011-06-21 Thread Lukas Czerner
On Mon, 20 Jun 2011, Christoph Hellwig wrote:

 Add a new rw_semaphore to protect page_mkwrite against truncate.  Previous
 i_alloc_sem was abused for this, but it's going away in this series.
 
 Signed-off-by: Christoph Hellwig h...@lst.de
 
 Index: linux-2.6/fs/ext4/inode.c
 ===
 --- linux-2.6.orig/fs/ext4/inode.c2011-06-20 14:25:33.779248128 +0200
 +++ linux-2.6/fs/ext4/inode.c 2011-06-20 14:27:53.025907745 +0200
 @@ -5357,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry,
   if (attr-ia_size  sbi-s_bitmap_maxbytes)
   return -EFBIG;
   }
 + down_write((EXT4_I(inode)-truncate_lock));
   }
  
   if (S_ISREG(inode-i_mode) 
 @@ -5405,6 +5406,9 @@ int ext4_setattr(struct dentry *dentry,
   ext4_truncate(inode);
   }
  
 + if (attr-ia_valid  ATTR_SIZE)
 + up_write((EXT4_I(inode)-truncate_lock));
 +

Hi Christoph,

this looks like a bug fix, so we should rather do it in a separate
commit. Could you send it separately, or at least mention that in commit
description ?

Otherwise the patch looks good.

Thanks!
-Lukas

   if (!rc) {
   setattr_copy(inode, attr);
   mark_inode_dirty(inode);
 @@ -5850,10 +5854,10 @@ int ext4_page_mkwrite(struct vm_area_str
   struct address_space *mapping = inode-i_mapping;
  
   /*
 -  * Get i_alloc_sem to stop truncates messing with the inode. We cannot
 -  * get i_mutex because we are already holding mmap_sem.
 +  * Get truncate_lock to stop truncates messing with the inode. We
 +  * cannot get i_mutex because we are already holding mmap_sem.
*/
 - down_read(inode-i_alloc_sem);
 + down_read((EXT4_I(inode)-truncate_lock));
   size = i_size_read(inode);
   if (page-mapping != mapping || size = page_offset(page)
   || !PageUptodate(page)) {
 @@ -5865,7 +5869,7 @@ int ext4_page_mkwrite(struct vm_area_str
   lock_page(page);
   wait_on_page_writeback(page);
   if (PageMappedToDisk(page)) {
 - up_read(inode-i_alloc_sem);
 + up_read((EXT4_I(inode)-truncate_lock));
   return VM_FAULT_LOCKED;
   }
  
 @@ -5883,7 +5887,7 @@ int ext4_page_mkwrite(struct vm_area_str
   if (page_has_buffers(page)) {
   if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
   ext4_bh_unmapped)) {
 - up_read(inode-i_alloc_sem);
 + up_read((EXT4_I(inode)-truncate_lock));
   return VM_FAULT_LOCKED;
   }
   }
 @@ -5912,11 +5916,11 @@ int ext4_page_mkwrite(struct vm_area_str
*/
   lock_page(page);
   wait_on_page_writeback(page);
 - up_read(inode-i_alloc_sem);
 + up_read((EXT4_I(inode)-truncate_lock));
   return VM_FAULT_LOCKED;
  out_unlock:
   if (ret)
   ret = VM_FAULT_SIGBUS;
 - up_read(inode-i_alloc_sem);
 + up_read((EXT4_I(inode)-truncate_lock));
   return ret;
  }
 Index: linux-2.6/fs/ext4/super.c
 ===
 --- linux-2.6.orig/fs/ext4/super.c2011-06-20 14:25:33.795914793 +0200
 +++ linux-2.6/fs/ext4/super.c 2011-06-20 14:27:06.269243443 +0200
 @@ -871,6 +871,7 @@ static struct inode *ext4_alloc_inode(st
   ei-i_datasync_tid = 0;
   atomic_set(ei-i_ioend_count, 0);
   atomic_set(ei-i_aiodio_unwritten, 0);
 + init_rwsem(ei-truncate_lock);
  
   return ei-vfs_inode;
  }
 Index: linux-2.6/fs/ext4/ext4.h
 ===
 --- linux-2.6.orig/fs/ext4/ext4.h 2011-06-20 14:25:33.752581461 +0200
 +++ linux-2.6/fs/ext4/ext4.h  2011-06-20 14:27:06.272576777 +0200
 @@ -844,6 +844,9 @@ struct ext4_inode_info {
   /* on-disk additional length */
   __u16 i_extra_isize;
  
 + /* protect page_mkwrite against truncates */
 + struct rw_semaphore truncate_lock;
 +
  #ifdef CONFIG_QUOTA
   /* quota space reservation, managed internally by quota code */
   qsize_t i_reserved_quota;
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-ext4 in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/8] ext4: remove i_alloc_sem abuse

2011-06-21 Thread Lukas Czerner
On Tue, 21 Jun 2011, Lukas Czerner wrote:

 On Mon, 20 Jun 2011, Christoph Hellwig wrote:
 
  Add a new rw_semaphore to protect page_mkwrite against truncate.  Previous
  i_alloc_sem was abused for this, but it's going away in this series.
  
  Signed-off-by: Christoph Hellwig h...@lst.de
  
  Index: linux-2.6/fs/ext4/inode.c
  ===
  --- linux-2.6.orig/fs/ext4/inode.c  2011-06-20 14:25:33.779248128 +0200
  +++ linux-2.6/fs/ext4/inode.c   2011-06-20 14:27:53.025907745 +0200
  @@ -5357,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry,
  if (attr-ia_size  sbi-s_bitmap_maxbytes)
  return -EFBIG;
  }
  +   down_write((EXT4_I(inode)-truncate_lock));
  }
   
  if (S_ISREG(inode-i_mode) 
  @@ -5405,6 +5406,9 @@ int ext4_setattr(struct dentry *dentry,
  ext4_truncate(inode);
  }
   
  +   if (attr-ia_valid  ATTR_SIZE)
  +   up_write((EXT4_I(inode)-truncate_lock));
  +
 
 Hi Christoph,
 
 this looks like a bug fix, so we should rather do it in a separate
 commit. Could you send it separately, or at least mention that in commit
 description ?

That's stupid note, it is not a bugfix of course. Ignore it, sorry for
the noise.

 
 Otherwise the patch looks good.
 
 Thanks!
 -Lukas
 
  if (!rc) {
  setattr_copy(inode, attr);
  mark_inode_dirty(inode);
  @@ -5850,10 +5854,10 @@ int ext4_page_mkwrite(struct vm_area_str
  struct address_space *mapping = inode-i_mapping;
   
  /*
  -* Get i_alloc_sem to stop truncates messing with the inode. We cannot
  -* get i_mutex because we are already holding mmap_sem.
  +* Get truncate_lock to stop truncates messing with the inode. We
  +* cannot get i_mutex because we are already holding mmap_sem.
   */
  -   down_read(inode-i_alloc_sem);
  +   down_read((EXT4_I(inode)-truncate_lock));
  size = i_size_read(inode);
  if (page-mapping != mapping || size = page_offset(page)
  || !PageUptodate(page)) {
  @@ -5865,7 +5869,7 @@ int ext4_page_mkwrite(struct vm_area_str
  lock_page(page);
  wait_on_page_writeback(page);
  if (PageMappedToDisk(page)) {
  -   up_read(inode-i_alloc_sem);
  +   up_read((EXT4_I(inode)-truncate_lock));
  return VM_FAULT_LOCKED;
  }
   
  @@ -5883,7 +5887,7 @@ int ext4_page_mkwrite(struct vm_area_str
  if (page_has_buffers(page)) {
  if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
  ext4_bh_unmapped)) {
  -   up_read(inode-i_alloc_sem);
  +   up_read((EXT4_I(inode)-truncate_lock));
  return VM_FAULT_LOCKED;
  }
  }
  @@ -5912,11 +5916,11 @@ int ext4_page_mkwrite(struct vm_area_str
   */
  lock_page(page);
  wait_on_page_writeback(page);
  -   up_read(inode-i_alloc_sem);
  +   up_read((EXT4_I(inode)-truncate_lock));
  return VM_FAULT_LOCKED;
   out_unlock:
  if (ret)
  ret = VM_FAULT_SIGBUS;
  -   up_read(inode-i_alloc_sem);
  +   up_read((EXT4_I(inode)-truncate_lock));
  return ret;
   }
  Index: linux-2.6/fs/ext4/super.c
  ===
  --- linux-2.6.orig/fs/ext4/super.c  2011-06-20 14:25:33.795914793 +0200
  +++ linux-2.6/fs/ext4/super.c   2011-06-20 14:27:06.269243443 +0200
  @@ -871,6 +871,7 @@ static struct inode *ext4_alloc_inode(st
  ei-i_datasync_tid = 0;
  atomic_set(ei-i_ioend_count, 0);
  atomic_set(ei-i_aiodio_unwritten, 0);
  +   init_rwsem(ei-truncate_lock);
   
  return ei-vfs_inode;
   }
  Index: linux-2.6/fs/ext4/ext4.h
  ===
  --- linux-2.6.orig/fs/ext4/ext4.h   2011-06-20 14:25:33.752581461 +0200
  +++ linux-2.6/fs/ext4/ext4.h2011-06-20 14:27:06.272576777 +0200
  @@ -844,6 +844,9 @@ struct ext4_inode_info {
  /* on-disk additional length */
  __u16 i_extra_isize;
   
  +   /* protect page_mkwrite against truncates */
  +   struct rw_semaphore truncate_lock;
  +
   #ifdef CONFIG_QUOTA
  /* quota space reservation, managed internally by quota code */
  qsize_t i_reserved_quota;
  
  --
  To unsubscribe from this list: send the line unsubscribe linux-ext4 in
  the body of a message to majord...@vger.kernel.org
  More majordomo info at  http://vger.kernel.org/majordomo-info.html
  
 
 

-- 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html