[PATCH v2 1/2] Btrfs: cleanup of error processing in btree_get_extent()

2012-09-13 Thread Tsutomu Itoh
This patch simplifies a little complex error processing in
btree_get_extent().

Signed-off-by: Tsutomu Itoh 
---
 fs/btrfs/disk-io.c |   14 +-
 1 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 29c69e6..27d0ebe 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -222,21 +222,17 @@ static struct extent_map *btree_get_extent(struct inode 
*inode,
 
free_extent_map(em);
em = lookup_extent_mapping(em_tree, start, len);
-   if (em) {
-   ret = 0;
-   } else {
-   em = lookup_extent_mapping(em_tree, failed_start,
-  failed_len);
-   ret = -EIO;
+   if (!em) {
+   lookup_extent_mapping(em_tree, failed_start,
+ failed_len);
+   em = ERR_PTR(-EIO);
}
} else if (ret) {
free_extent_map(em);
-   em = NULL;
+   em = ERR_PTR(ret);
}
write_unlock(&em_tree->lock);
 
-   if (ret)
-   em = ERR_PTR(ret);
 out:
return em;
 }
-- 
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/2] Btrfs: remove unnecessary code in btree_get_extent()

2012-09-13 Thread Tsutomu Itoh
Unnecessary lookup_extent_mapping() is removed because an error is
returned to the caller.
This patch was made based on the advice from Stefan Behrens, thanks.

Signed-off-by: Tsutomu Itoh 
---
 fs/btrfs/disk-io.c |8 +---
 1 files changed, 1 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 27d0ebe..8b7dce9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -217,16 +217,10 @@ static struct extent_map *btree_get_extent(struct inode 
*inode,
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
-   u64 failed_start = em->start;
-   u64 failed_len = em->len;
-
free_extent_map(em);
em = lookup_extent_mapping(em_tree, start, len);
-   if (!em) {
-   lookup_extent_mapping(em_tree, failed_start,
- failed_len);
+   if (!em)
em = ERR_PTR(-EIO);
-   }
} else if (ret) {
free_extent_map(em);
em = ERR_PTR(ret);
-- 
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] Revert "Btrfs: do not do filemap_write_and_wait_range in fsync"

2012-09-13 Thread Miao Xie
This reverts commit 0885ef5b5601e9b007c383e77c172769b1f214fd

After applying the above patch, the performance slowed down because the dirty
page flush can only be done by one task, so revert it.

The following is the test result of sysbench:
Before  After
24MB/s  39MB/s

Signed-off-by: Miao Xie 
---
 fs/btrfs/file.c |   14 +++---
 1 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1a5f76b..1ea5c39 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1513,12 +1513,20 @@ int btrfs_sync_file(struct file *file, loff_t start, 
loff_t end, int datasync)
 
trace_btrfs_sync_file(file, datasync);
 
+   /*
+* We write the dirty pages in the range and wait until they complete
+* out of the ->i_mutex. If so, we can flush the dirty pages by
+* multi-task, and make the performance up.
+*/
+   ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+   if (ret)
+   return ret;
+
mutex_lock(&inode->i_mutex);
 
/*
-* we wait first, since the writeback may change the inode, also wait
-* ordered range does a filemape_write_and_wait_range which is why we
-* don't do it above like other file systems.
+* We flush the dirty pages again to avoid some dirty pages in the
+* range being left.
 */
atomic_inc(&root->log_batch);
btrfs_wait_ordered_range(inode, start, end);
-- 
1.7.6.5
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] Btrfs: cleanup duplicated division functions

2012-09-13 Thread Miao Xie
div_factor{_fine} has been implemented for two times, cleanup it.
And I move them into a independent file named math.h because they are
common math functions.

Signed-off-by: Miao Xie 
---
 fs/btrfs/extent-tree.c |   19 +--
 fs/btrfs/math.h|   44 
 fs/btrfs/volumes.c |   23 +--
 3 files changed, 46 insertions(+), 40 deletions(-)
 create mode 100644 fs/btrfs/math.h

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a010234..5f6bae1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,7 @@
 #include "volumes.h"
 #include "locking.h"
 #include "free-space-cache.h"
+#include "math.h"
 
 #undef SCRAMBLE_DELAYED_REFS
 
@@ -648,24 +649,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info 
*info)
rcu_read_unlock();
 }
 
-static u64 div_factor(u64 num, int factor)
-{
-   if (factor == 10)
-   return num;
-   num *= factor;
-   do_div(num, 10);
-   return num;
-}
-
-static u64 div_factor_fine(u64 num, int factor)
-{
-   if (factor == 100)
-   return num;
-   num *= factor;
-   do_div(num, 100);
-   return num;
-}
-
 u64 btrfs_find_block_group(struct btrfs_root *root,
   u64 search_start, u64 search_hint, int owner)
 {
diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h
new file mode 100644
index 000..b7816ce
--- /dev/null
+++ b/fs/btrfs/math.h
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (C) 2012 Fujitsu.  All rights reserved.
+ * Written by Miao Xie 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_MATH_H
+#define __BTRFS_MATH_H
+
+#include 
+
+static inline u64 div_factor(u64 num, int factor)
+{
+   if (factor == 10)
+   return num;
+   num *= factor;
+   do_div(num, 10);
+   return num;
+}
+
+static inline u64 div_factor_fine(u64 num, int factor)
+{
+   if (factor == 100)
+   return num;
+   num *= factor;
+   do_div(num, 100);
+   return num;
+}
+
+#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3f4e70e..2558fc0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -25,7 +25,6 @@
 #include 
 #include 
 #include 
-#include 
 #include "compat.h"
 #include "ctree.h"
 #include "extent_map.h"
@@ -36,6 +35,7 @@
 #include "async-thread.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
+#include "math.h"
 
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -2325,18 +2325,6 @@ static int chunk_profiles_filter(u64 chunk_type,
return 1;
 }
 
-static u64 div_factor_fine(u64 num, int factor)
-{
-   if (factor <= 0)
-   return 0;
-   if (factor >= 100)
-   return num;
-
-   num *= factor;
-   do_div(num, 100);
-   return num;
-}
-
 static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
  struct btrfs_balance_args *bargs)
 {
@@ -2501,15 +2489,6 @@ static int should_balance_chunk(struct btrfs_root *root,
return 1;
 }
 
-static u64 div_factor(u64 num, int factor)
-{
-   if (factor == 10)
-   return num;
-   num *= factor;
-   do_div(num, 10);
-   return num;
-}
-
 static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 {
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
-- 
1.7.6.5
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: delay block group item insertion

2012-09-13 Thread Josef Bacik
On Wed, Sep 12, 2012 at 09:35:55PM -0600, Miao Xie wrote:
> Onwed, 12 Sep 2012 14:04:13 -0400, Josef Bacik wrote:
> > So we have lots of places where we try to preallocate chunks in order to
> > make sure we have enough space as we make our allocations.  This has
> > historically meant that we're constantly tweaking when we should allocate a
> > new chunk, and historically we have gotten this horribly wrong so we way
> > over allocate either metadata or data.  To try and keep this from happening
> > we are going to make it so that the block group item insertion is done out
> > of band at the end of a transaction.  This will allow us to create chunks
> > even if we are trying to make an allocation for the extent tree.  With this
> > patch my enospc tests run faster (didn't expect this) and more efficiently
> > use the disk space (this is what I wanted).  Thanks,
> 
> This patch also fixes a deadlock problem that happened when we add two or
> more small devices(< 4G) into a seed fs(the profile of metadata is RAID1),
> and a enospc problem when we add a small device (< 256M) into a big empty
> seed fs(> 60G).
> 
> (My fix patch which is similar to this one is on the way, I'm a bit slow :) )
> 
> > @@ -1400,6 +1407,9 @@ int btrfs_commit_transaction(struct 
> > btrfs_trans_handle *trans,
> >  */
> > cur_trans->delayed_refs.flushing = 1;
> >  
> > +   if (!list_empty(&trans->new_bgs))
> > +   btrfs_create_pending_block_groups(trans, root);
> > +
> > ret = btrfs_run_delayed_refs(trans, root, 0);
> > if (ret)
> > goto cleanup_transaction;
> 
> I think we can not make sure we won't allocate new chunks when we
> create the pending snapshots and write out the space cache and inode
> cache, so we should check ->new_bgs and call 
> btrfs_create_pending_block_groups()
> when committing the cowonly tree roots.
> 
> And beside that, We'd better add a BUG_ON() after we update the root tree to
> make sure there is no pending block group item left in the list.
> 

We're also running this in run_delayed_refs when we want to run all delayed refs
so we should be pretty safe, but a BUG_ON() would definitely make sure we are.
Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: delay block group item insertion

2012-09-13 Thread Josef Bacik
On Wed, Sep 12, 2012 at 07:32:05PM -0600, Liu Bo wrote:
> On Wed, Sep 12, 2012 at 02:04:13PM -0400, Josef Bacik wrote:
> > So we have lots of places where we try to preallocate chunks in order to
> > make sure we have enough space as we make our allocations.  This has
> > historically meant that we're constantly tweaking when we should allocate a
> > new chunk, and historically we have gotten this horribly wrong so we way
> > over allocate either metadata or data.  To try and keep this from happening
> > we are going to make it so that the block group item insertion is done out
> > of band at the end of a transaction.  This will allow us to create chunks
> > even if we are trying to make an allocation for the extent tree.  With this
> > patch my enospc tests run faster (didn't expect this) and more efficiently
> > use the disk space (this is what I wanted).  Thanks,
> >
> 
> I'm afraid this does not perform good enough in normal case, here is the
> compilebench test:
> 
> # cat btrfs-makej/result-4k
> intial create total runs 30 avg 51.99 MB/s (user 0.50s sys 0.85s)
> compile total runs 30 avg 98.45 MB/s (user 0.12s sys 0.38s)
> read compiled tree total runs 3 avg 19.89 MB/s (user 1.55s sys 3.07s)
> delete compiled tree total runs 30 avg 12.15 seconds (user 0.66s sys 2.15s)
> 
> # cat btrfs-josef-makej/result
> intial create total runs 30 avg 49.79 MB/s (user 0.52s sys 0.87s)
> compile total runs 30 avg 70.01 MB/s (user 0.14s sys 0.44s)
> read compiled tree total runs 3 avg 18.46 MB/s (user 1.57s sys 3.16s)
> delete compiled tree total runs 30 avg 13.88 seconds (user 0.67s sys 2.18s)
> 
> And the blktrace shows that it makes us do more seeks in create and
> compile section.
> 
> The patch overall looks clean and good though.
> 

Yeah mostly what I'm looking for is more efficient metadata chunk allocation,
and I expected it to have a performance impact somwhere (I just never hit one in
my ENOSPC tests).  This drop is within reason for me and I'm willing to eat a
slight performance drop to not allocate an entire disk with metadata ;).
Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] Revert "Btrfs: do not do filemap_write_and_wait_range in fsync"

2012-09-13 Thread Josef Bacik
On Thu, Sep 13, 2012 at 04:53:47AM -0600, Miao Xie wrote:
> This reverts commit 0885ef5b5601e9b007c383e77c172769b1f214fd
> 
> After applying the above patch, the performance slowed down because the dirty
> page flush can only be done by one task, so revert it.
> 
> The following is the test result of sysbench:
>   Before  After
>   24MB/s  39MB/s
> 

Ah I didn't think about that, good point, thanks!

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: "Task blocked for more than 120 seconds" on file copy

2012-09-13 Thread Josef Bacik
On Wed, Sep 12, 2012 at 03:16:23PM -0600, Roman Mamedov wrote:
> Hello,
> 
> I was copying a 60 GB file onto btrfs, and at the same time tried to 
> stop/restart Squid,
> which has its cache on the same partition. Squid locked up until the copying 
> was finished.
> This filesystem was resized by about +100% just before, if this could matter.
> The kernel is 3.6.0-rc5. From dmesg:
> 

When you get these sort of lockups we need sysrq+w to get all of the blocked
tasks, not just hte ones that the hung task timer catches.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ssd alignment and btrfs sector size

2012-09-13 Thread Josef Bacik
On Thu, Sep 13, 2012 at 12:02:04AM -0600, ching lu wrote:
> I am trying to test btrfs on my ssd, i am studying about btrfs and alignment.
> 
> I have read this old mail:
> http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg16177.html
> 
> According to the thread, 3 parameters should be tuned
> 
> 1. leafsize
> 2. nodesize
> 3. sectorsize
> 
> The first two parameters are easy to understand, but how does
> "sectorsize" affect the alignment?
> 
> If the erase block size of my SSD is 16k, and i set
> leafsize=nodesize=16k, leave the sectorsize=4k (default), will it
> causes misalignment?

So frist leafsize/nodesize will always be the same, so if you set one to 16k
mkfs will autmatically set the other to 16k, it's just way back when we had
grand plans of having different sizes for both.

Secondly your erase block size is 16k?  What kind of drive are you using?  But
yeah 4k sectorsize will result in misaligned writes to the data area, but your
metadata will be aligned.  Hopefully the raid5/6 code will be out soon and we'll
be able to do > page size sectorsize and you'll be able to set
leafsize==sectorsize.  Btw most normal SSDs have wy larger erase blocks, on
the order of several MB, so it's not the worst thing to write to the middle of
erase blocks, everybody does it anyway.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] Btrfs: fix a bug of per-file nocow

2012-09-13 Thread David Sterba
On Tue, Sep 11, 2012 at 10:33:50PM +0800, Liu Bo wrote:
> Users report a bug, the reproducer is:
> $ mkfs.btrfs /dev/loop0
> $ mount /dev/loop0 /mnt/btrfs/
> $ mkdir /mnt/btrfs/dir
> $ chattr +C /mnt/btrfs/dir/
> $ dd if=/dev/zero of=/mnt/btrfs/dir/foo bs=4K count=10;
> $ lsattr /mnt/btrfs/dir/foo
> ---C- /mnt/btrfs/dir/foo
> $ filefrag /mnt/btrfs/dir/foo
> /mnt/btrfs/dir/foo: 1 extent found---> an extent
> $ dd if=/dev/zero of=/mnt/btrfs/dir/foo bs=4K count=1 seek=5 
> conv=notrunc,nocreat; sync
> $ filefrag /mnt/btrfs/dir/foo
> /mnt/btrfs/dir/foo: 3 extents found   ---> with nocow, btrfs breaks the 
> extent into three parts
> 
> The new created file should not only inherit the NODATACOW flag, but also
> honor NODATASUM flag, because we must do COW on a file extent with checksum.
> 
> Signed-off-by: Liu Bo 

Reviewed-by: David Sterba 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


do_sync_write and do_sync_read

2012-09-13 Thread Sonu


--

const struct file_operations btrfs_file_operations = {
::

    .read   = do_sync_read,
    .write  = do_sync_write,
--


trying to locate do_sync_read (write) any idea where are they ?

Thanks

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: do_sync_write and do_sync_read

2012-09-13 Thread David Sterba
On Fri, Sep 14, 2012 at 12:08:35AM +0800, Sonu wrote:
> trying to locate do_sync_read (write) any idea where are they ?

fs/read_write.c
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: delay block group item insertion V2

2012-09-13 Thread Josef Bacik
So we have lots of places where we try to preallocate chunks in order to
make sure we have enough space as we make our allocations.  This has
historically meant that we're constantly tweaking when we should allocate a
new chunk, and historically we have gotten this horribly wrong so we way
over allocate either metadata or data.  To try and keep this from happening
we are going to make it so that the block group item insertion is done out
of band at the end of a transaction.  This will allow us to create chunks
even if we are trying to make an allocation for the extent tree.  With this
patch my enospc tests run faster (didn't expect this) and more efficiently
use the disk space (this is what I wanted).  Thanks,

Signed-off-by: Josef Bacik 
---
V1->V2: fix deadlock when inserting block groups from run_delayed_refs

 fs/btrfs/ctree.h   |5 ++
 fs/btrfs/extent-tree.c |  127 +++-
 fs/btrfs/transaction.c |   10 
 fs/btrfs/transaction.h |1 +
 4 files changed, 76 insertions(+), 67 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 305002b..d66dc1c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1137,6 +1137,9 @@ struct btrfs_block_group_cache {
 * Today it will only have one thing on it, but that may change
 */
struct list_head cluster_list;
+
+   /* For delayed block group creation */
+   struct list_head new_bg_list;
 };
 
 /* delayed seq elem */
@@ -2865,6 +2868,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
*trans,
   u64 size);
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 struct btrfs_root *root, u64 group_start);
+void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
+  struct btrfs_root *root);
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a8de1c3..124f3a0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2361,10 +2361,6 @@ static noinline int run_clustered_refs(struct 
btrfs_trans_handle *trans,
}
 
 next:
-   do_chunk_alloc(trans, fs_info->extent_root,
-  2 * 1024 * 1024,
-  btrfs_get_alloc_profile(root, 0),
-  CHUNK_ALLOC_NO_FORCE);
cond_resched();
spin_lock(&delayed_refs->lock);
}
@@ -2478,10 +2474,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle 
*trans,
if (root == root->fs_info->extent_root)
root = root->fs_info->tree_root;
 
-   do_chunk_alloc(trans, root->fs_info->extent_root,
-  2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
-  CHUNK_ALLOC_NO_FORCE);
-
btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
 
delayed_refs = &trans->transaction->delayed_refs;
@@ -2551,6 +2543,12 @@ again:
}
 
if (run_all) {
+   if (!list_empty(&trans->new_bgs)) {
+   spin_unlock(&delayed_refs->lock);
+   btrfs_create_pending_block_groups(trans, root);
+   spin_lock(&delayed_refs->lock);
+   }
+
node = rb_first(&delayed_refs->root);
if (!node)
goto out;
@@ -3826,7 +3824,8 @@ enum flush_state {
FLUSH_DELALLOC_WAIT =   2,
FLUSH_DELAYED_ITEMS_NR  =   3,
FLUSH_DELAYED_ITEMS =   4,
-   COMMIT_TRANS=   5,
+   ALLOC_CHUNK =   5,
+   COMMIT_TRANS=   6,
 };
 
 static int flush_space(struct btrfs_root *root,
@@ -3863,6 +3862,20 @@ static int flush_space(struct btrfs_root *root,
ret = btrfs_run_delayed_items_nr(trans, root, nr);
btrfs_end_transaction(trans, root);
break;
+   case ALLOC_CHUNK:
+   trans = btrfs_join_transaction(root);
+   if (IS_ERR(trans)) {
+   ret = PTR_ERR(trans);
+   break;
+   }
+   ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+num_bytes,
+btrfs_get_alloc_profile(root, 0),
+CHUNK_ALLOC_NO_FORCE);
+   btrfs_end_transaction(trans, root);
+   if (ret == -ENOSPC)
+   ret = 0;
+   break;
case COMMIT_TRANS:
ret = may_commit_transaction(root, space_info, orig_bytes, 0);
break;
@@ -5515,8 +5528,6 @@ static noinline int find_free_extent(struct 
btrfs_trans_handle *trans,
   

[PATCH] btrfs: return EPERM upon rmdir on a subvolume

2012-09-13 Thread David Sterba
A subvolume cannot be deleted via rmdir, but the error code ENOTEMPTY
is confusing. Return EPERM instead, as this is not permitted.

Signed-off-by: David Sterba 
---
 fs/btrfs/inode.c |5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a4167ef..a67dadd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3815,9 +3815,10 @@ static int btrfs_rmdir(struct inode *dir, struct dentry 
*dentry)
struct btrfs_trans_handle *trans;
unsigned long nr = 0;
 
-   if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
-   btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+   if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
+   if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+   return -EPERM;
 
trans = __unlink_start_trans(dir, dentry);
if (IS_ERR(trans))
-- 
1.7.9

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ssd alignment and btrfs sector size

2012-09-13 Thread ching
On 09/13/2012 09:00 PM, Josef Bacik wrote:
> On Thu, Sep 13, 2012 at 12:02:04AM -0600, ching lu wrote:
>> I am trying to test btrfs on my ssd, i am studying about btrfs and alignment.
>>
>> I have read this old mail:
>> http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg16177.html
>>
>> According to the thread, 3 parameters should be tuned
>>
>> 1. leafsize
>> 2. nodesize
>> 3. sectorsize
>>
>> The first two parameters are easy to understand, but how does
>> "sectorsize" affect the alignment?
>>
>> If the erase block size of my SSD is 16k, and i set
>> leafsize=nodesize=16k, leave the sectorsize=4k (default), will it
>> causes misalignment?
> So frist leafsize/nodesize will always be the same, so if you set one to 16k
> mkfs will autmatically set the other to 16k, it's just way back when we had
> grand plans of having different sizes for both.
>
> Secondly your erase block size is 16k?  What kind of drive are you using?  But
> yeah 4k sectorsize will result in misaligned writes to the data area, but your
> metadata will be aligned.  Hopefully the raid5/6 code will be out soon and 
> we'll
> be able to do > page size sectorsize and you'll be able to set
> leafsize==sectorsize.  Btw most normal SSDs have wy larger erase blocks, 
> on
> the order of several MB, so it's not the worst thing to write to the middle of
> erase blocks, everybody does it anyway.  Thanks,
>
> Josef
>

16k is just a assumption only, thanks for your help anyway.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: using for_each_set_bit_from to simplify the code

2012-09-13 Thread Wei Yongjun
From: Wei Yongjun 

Using for_each_set_bit_from() to simplify the code.

spatch with a semantic match is used to found this.
(http://coccinelle.lip6.fr/)

Signed-off-by: Wei Yongjun 
---
 fs/btrfs/free-space-cache.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6b10acf..b107e68 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1454,9 +1454,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
  max_t(u64, *offset, bitmap_info->offset));
bits = bytes_to_bits(*bytes, ctl->unit);
 
-   for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
-i < BITS_PER_BITMAP;
-i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
+   for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
next_zero = find_next_zero_bit(bitmap_info->bitmap,
   BITS_PER_BITMAP, i);
if ((next_zero - i) >= bits) {
@@ -2307,9 +2305,7 @@ static int btrfs_bitmap_cluster(struct 
btrfs_block_group_cache *block_group,
 
 again:
found_bits = 0;
-   for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
-i < BITS_PER_BITMAP;
-i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
+   for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
next_zero = find_next_zero_bit(entry->bitmap,
   BITS_PER_BITMAP, i);
if (next_zero - i >= min_bits) {

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs + Btrfs-progs: make pipe functions re-usable

2012-09-13 Thread anand jain


 btrfs send introduced a part of code to read kernel-data
 from user-end using pipe. We need this part of code to be
 useable outside of send sub-cmd, so that developing
 service sub-cmd can use it.
 Following this email are the patches for this purpose.

Thanks, Anand

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/1] Btrfs-progs: Update btrfs man page to indicate label for a mounted fs can be changed

2012-09-13 Thread Anand jain
From: Anand Jain 

Signed-off-by: Anand Jain 
---
 man/btrfs.8.in |   14 ++
 1 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/man/btrfs.8.in b/man/btrfs.8.in
index 4b0a9f9..0258d87 100644
--- a/man/btrfs.8.in
+++ b/man/btrfs.8.in
@@ -25,7 +25,7 @@ btrfs \- control a btrfs filesystem
 .PP
 \fBbtrfs\fP \fBfilesystem resize\fP\fI [devid:][+/\-][gkm]|[devid:]max 
\fP
 .PP
-\fBbtrfs\fP \fBfilesystem label\fP\fI  [newlabel]\fP
+\fBbtrfs\fP \fBfilesystem label\fP\fI | [newlabel]\fP
 .PP
 \fBbtrfs\fP \fBsubvolume find-new\fP\fI  \fP
 .PP
@@ -197,8 +197,8 @@ it with the new desired size.  When recreating the 
partition make sure to use
 the same starting disk cylinder as before.
 .TP
 
-\fBfilesystem label\fP\fI  [newlabel]\fP
-Show or update the label of a filesystem. \fI\fR is used to identify the
+\fBfilesystem label\fP\fI | [newlabel]\fP
+Show or update the label of a filesystem. \fI|\fR is used to 
identify the
 filesystem. 
 If a \fInewlabel\fR optional argument is passed, the label is changed. The
 following constraints exist for a label:
@@ -207,11 +207,9 @@ following constraints exist for a label:
 .IP
 - the label shall not  contain the '/' or '\\' characters.
 
-NOTE: Currently there are the following limitations:
-.IP
-- the filesystem has to be unmounted
-.IP
-- the filesystem should not have more than one device.
+Note: 
+As of now a btrfs filesystem containing more than one device must be
+mounted to change its filesystem label.
 .TP
 
 \fBfilesystem show\fR [--all-devices||]\fR
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: write_buf is now callable outside send.c

2012-09-13 Thread Anand jain
From: Anand Jain 

Developing service cmds needs it.

Signed-off-by: Anand Jain 
---
 fs/btrfs/send.c |   11 ++-
 fs/btrfs/send.h |1 +
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fb5ffe9..89411b3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -377,7 +377,7 @@ static struct btrfs_path *alloc_path_for_send(void)
return path;
 }
 
-static int write_buf(struct send_ctx *sctx, const void *buf, u32 len)
+int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
 {
int ret;
mm_segment_t old_fs;
@@ -387,8 +387,7 @@ static int write_buf(struct send_ctx *sctx, const void 
*buf, u32 len)
set_fs(KERNEL_DS);
 
while (pos < len) {
-   ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos,
-   &sctx->send_off);
+   ret = vfs_write(filp, (char *)buf + pos, len - pos, off);
/* TODO handle that correctly */
/*if (ret == -ERESTARTSYS) {
continue;
@@ -544,7 +543,8 @@ static int send_header(struct send_ctx *sctx)
strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
 
-   return write_buf(sctx, &hdr, sizeof(hdr));
+   return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
+   &sctx->send_off);
 }
 
 /*
@@ -581,7 +581,8 @@ static int send_cmd(struct send_ctx *sctx)
crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
hdr->crc = cpu_to_le32(crc);
 
-   ret = write_buf(sctx, sctx->send_buf, sctx->send_size);
+   ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
+   &sctx->send_off);
 
sctx->total_send_size += sctx->send_size;
sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 9934e94..1bf4f32 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -130,4 +130,5 @@ enum {
 
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
+int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off);
 #endif
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs-progs: make dump_thread and write_buf usable outside cmds-send.c

2012-09-13 Thread Anand jain
From: Anand Jain 

Developing service cmds needs it.

Signed-off-by: Anand Jain 
---
 cmds-send.c |   15 +--
 commands.h  |8 
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/cmds-send.c b/cmds-send.c
index 41ea523..9e94410 100644
--- a/cmds-send.c
+++ b/cmds-send.c
@@ -175,7 +175,7 @@ static void add_clone_source(struct btrfs_send *s, u64 
root_id)
s->clone_sources[s->clone_sources_count++] = root_id;
 }
 
-static int write_buf(int fd, const void *buf, int size)
+int write_buf(int fd, const void *buf, int size)
 {
int ret;
int pos = 0;
@@ -202,15 +202,15 @@ out:
return ret;
 }
 
-static void *dump_thread(void *arg_)
+void *dump_thread(void *arg_)
 {
int ret;
-   struct btrfs_send *s = (struct btrfs_send*)arg_;
+   struct btrfs_dump *d = (struct btrfs_dump*)arg_;
char buf[4096];
int readed;
 
while (1) {
-   readed = read(s->send_fd, buf, sizeof(buf));
+   readed = read(d->from_fd, buf, sizeof(buf));
if (readed < 0) {
ret = -errno;
fprintf(stderr, "ERROR: failed to read stream from "
@@ -221,7 +221,7 @@ static void *dump_thread(void *arg_)
ret = 0;
goto out;
}
-   ret = write_buf(s->dump_fd, buf, readed);
+   ret = write_buf(d->to_fd, buf, readed);
if (ret < 0)
goto out;
}
@@ -241,6 +241,7 @@ static int do_send(struct btrfs_send *send, u64 root_id, 
u64 parent_root)
pthread_attr_t t_attr;
struct btrfs_ioctl_send_args io_send;
struct subvol_info *si;
+   struct btrfs_dump dump;
void *t_err = NULL;
int subvol_fd = -1;
int pipefd[2];
@@ -273,10 +274,12 @@ static int do_send(struct btrfs_send *send, u64 root_id, 
u64 parent_root)
 
io_send.send_fd = pipefd[1];
send->send_fd = pipefd[0];
+   dump.from_fd = pipefd[0];
+   dump.to_fd = send->dump_fd;
 
if (!ret)
ret = pthread_create(&t_read, &t_attr, dump_thread,
-   send);
+   &dump);
if (ret) {
ret = -ret;
fprintf(stderr, "ERROR: thread setup failed: %s\n",
diff --git a/commands.h b/commands.h
index bb6d2dd..c65ba20 100644
--- a/commands.h
+++ b/commands.h
@@ -82,6 +82,14 @@ void help_command_group(const struct cmd_group *grp, int 
argc, char **argv);
 /* common.c */
 int open_file_or_dir(const char *fname);
 
+/* cmds-send.c */
+int write_buf(int fd, const void *buf, int size);
+void * dump_thread(void *arg_);
+struct btrfs_dump {
+int from_fd;
+int to_fd;
+};
+
 extern const struct cmd_group subvolume_cmd_group;
 extern const struct cmd_group filesystem_cmd_group;
 extern const struct cmd_group balance_cmd_group;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs-progs: making send.h inline with its kernel side change

2012-09-13 Thread Anand jain
From: Anand Jain 

Signed-off-by: Anand Jain 
---
 send.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/send.h b/send.h
index 9934e94..1bf4f32 100644
--- a/send.h
+++ b/send.h
@@ -130,4 +130,5 @@ enum {
 
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
+int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off);
 #endif
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html