[PATCH] Btrfs: fix use-after-free when cloning a trailing file hole

2014-06-29 Thread Filipe David Borba Manana
The transaction handle was being used after being freed.

Cc: Chris Mason c...@fb.com
Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ioctl.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 02dc64b..2562dc7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3565,12 +3565,10 @@ process_slot:
btrfs_end_transaction(trans, root);
goto out;
}
-   ret = clone_finish_inode_update(trans, inode, destoff + len,
-   destoff, olen);
-   if (ret)
-   goto out;
clone_update_extent_map(inode, trans, path, NULL, last_dest_end,
destoff + len - last_dest_end);
+   ret = clone_finish_inode_update(trans, inode, destoff + len,
+   destoff, olen);
}
 
 out:
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: fix use-after-free when cloning a trailing file hole

2014-06-29 Thread Filipe David Borba Manana
The transaction handle was being used after being freed.

Cc: Chris Mason c...@fb.com
Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Removed file extent item argument to clone_update_extent_map() for
more clarity.

 fs/btrfs/ioctl.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 02dc64b..2a99f49 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3142,7 +3142,6 @@ out:
 static void clone_update_extent_map(struct inode *inode,
const struct btrfs_trans_handle *trans,
const struct btrfs_path *path,
-   struct btrfs_file_extent_item *fi,
const u64 hole_offset,
const u64 hole_len)
 {
@@ -3157,7 +3156,11 @@ static void clone_update_extent_map(struct inode *inode,
return;
}
 
-   if (fi) {
+   if (path) {
+   struct btrfs_file_extent_item *fi;
+
+   fi = btrfs_item_ptr(path-nodes[0], path-slots[0],
+   struct btrfs_file_extent_item);
btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
em-generation = -1;
if (btrfs_file_extent_type(path-nodes[0], fi) ==
@@ -3511,18 +3514,15 @@ process_slot:
btrfs_item_ptr_offset(leaf, slot),
size);
inode_add_bytes(inode, datal);
-   extent = btrfs_item_ptr(leaf, slot,
-   struct btrfs_file_extent_item);
}
 
/* If we have an implicit hole (NO_HOLES feature). */
if (drop_start  new_key.offset)
clone_update_extent_map(inode, trans,
-   path, NULL, drop_start,
+   NULL, drop_start,
new_key.offset - drop_start);
 
-   clone_update_extent_map(inode, trans, path,
-   extent, 0, 0);
+   clone_update_extent_map(inode, trans, path, 0, 0);
 
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
@@ -3565,12 +3565,10 @@ process_slot:
btrfs_end_transaction(trans, root);
goto out;
}
+   clone_update_extent_map(inode, trans, NULL, last_dest_end,
+   destoff + len - last_dest_end);
ret = clone_finish_inode_update(trans, inode, destoff + len,
destoff, olen);
-   if (ret)
-   goto out;
-   clone_update_extent_map(inode, trans, path, NULL, last_dest_end,
-   destoff + len - last_dest_end);
}
 
 out:
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: atomically set inode-i_flags in btrfs_update_iflags

2014-06-25 Thread Filipe David Borba Manana
This change is based on the corresponding recent change for ext4:

  ext4: atomically set inode-i_flags in ext4_set_inode_flags()

That has the following commit message that applies to btrfs as well:

  Use cmpxchg() to atomically set i_flags instead of clearing out the
   S_IMMUTABLE, S_APPEND, etc. flags and then setting them from the
   EXT4_IMMUTABLE_FL, EXT4_APPEND_FL flags, since this opens up a race
   where an immutable file has the immutable flag cleared for a brief
   window of time.

Replacing EXT4_IMMUTABLE_FL and EXT4_APPEND_FL with BTRFS_INODE_IMMUTABLE
and BTRFS_INODE_APPEND, respectively.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ioctl.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6ea1546..02dc64b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -136,19 +136,22 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int 
flags)
 void btrfs_update_iflags(struct inode *inode)
 {
struct btrfs_inode *ip = BTRFS_I(inode);
-
-   inode-i_flags = ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+   unsigned int new_fl = 0;
 
if (ip-flags  BTRFS_INODE_SYNC)
-   inode-i_flags |= S_SYNC;
+   new_fl |= S_SYNC;
if (ip-flags  BTRFS_INODE_IMMUTABLE)
-   inode-i_flags |= S_IMMUTABLE;
+   new_fl |= S_IMMUTABLE;
if (ip-flags  BTRFS_INODE_APPEND)
-   inode-i_flags |= S_APPEND;
+   new_fl |= S_APPEND;
if (ip-flags  BTRFS_INODE_NOATIME)
-   inode-i_flags |= S_NOATIME;
+   new_fl |= S_NOATIME;
if (ip-flags  BTRFS_INODE_DIRSYNC)
-   inode-i_flags |= S_DIRSYNC;
+   new_fl |= S_DIRSYNC;
+
+   set_mask_bits(inode-i_flags,
+ S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
+ new_fl);
 }
 
 /*
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: fix crash when starting transaction

2014-06-24 Thread Filipe David Borba Manana
Often when starting a transaction we commit the currently running transaction,
which can end up writing block group caches when the current process has its
journal_info set to NULL (and not to a transaction). This makes our assertion
at btrfs_check_data_free_space() (current_journal != NULL) fail, resulting
in a crash/hang. Therefore fix it by setting journal_info.

Two different traces of this issue follow below.

1)

[51502.241936] BTRFS: assertion failed: current-journal_info, file: 
fs/btrfs/extent-tree.c, line: 3670
[51502.242213] [ cut here ]
[51502.242493] kernel BUG at fs/btrfs/ctree.h:3964!
[51502.242669] invalid opcode:  [#1] SMP DEBUG_PAGEALLOC
(...)
[51502.244010] Call Trace:
[51502.244010]  [a02bc025] 
btrfs_check_data_free_space+0x395/0x3a0 [btrfs]
[51502.244010]  [a02c3bdc] 
btrfs_write_dirty_block_groups+0x4ac/0x640 [btrfs]
[51502.244010]  [a0357a6a] commit_cowonly_roots+0x164/0x226 
[btrfs]
[51502.244010]  [a02d53cd] btrfs_commit_transaction+0x4ed/0xab0 
[btrfs]
[51502.244010]  [8168ec7b] ? _raw_spin_unlock+0x2b/0x40
[51502.244010]  [a02d6259] start_transaction+0x459/0x620 [btrfs]
[51502.244010]  [a02d67ab] btrfs_start_transaction+0x1b/0x20 
[btrfs]
[51502.244010]  [a02d73e1] __unlink_start_trans+0x31/0xe0 [btrfs]
[51502.244010]  [a02dea67] btrfs_unlink+0x37/0xc0 [btrfs]
[51502.244010]  [811bb054] ? do_unlinkat+0x114/0x2a0
[51502.244010]  [811baebc] vfs_unlink+0xcc/0x150
[51502.244010]  [811bb1a0] do_unlinkat+0x260/0x2a0
[51502.244010]  [811a9ef4] ? filp_close+0x64/0x90
[51502.244010]  [810aaea6] ? trace_hardirqs_on_caller+0x16/0x1e0
[51502.244010]  [81349cab] ? trace_hardirqs_on_thunk+0x3a/0x3f
[51502.244010]  [811be9eb] SyS_unlinkat+0x1b/0x40
[51502.244010]  [81698452] system_call_fastpath+0x16/0x1b
[51502.244010] Code: 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 89 f1 48 c7 
c2 71 13 36 a0 48 89 fe 31 c0 48 c7 c7 b8 43 36 a0 48 89 e5 e8 5d b0 32 e1 0f 
0b 0f 1f 44 00 00 55 b9 11 00 00 00 48 89 e5 41 55 49 89 f5
[51502.244010] RIP  [a03575da] assfail.constprop.88+0x1e/0x20 
[btrfs]

2)

[25405.097230] BTRFS: assertion failed: current-journal_info, file: 
fs/btrfs/extent-tree.c, line: 3670
[25405.097488] [ cut here ]
[25405.097767] kernel BUG at fs/btrfs/ctree.h:3964!
[25405.097940] invalid opcode:  [#1] SMP DEBUG_PAGEALLOC
(...)
[25405.18] Call Trace:
[25405.18]  [a02bc025] 
btrfs_check_data_free_space+0x395/0x3a0 [btrfs]
[25405.18]  [a02c3bdc] 
btrfs_write_dirty_block_groups+0x4ac/0x640 [btrfs]
[25405.18]  [a035755a] commit_cowonly_roots+0x164/0x226 
[btrfs]
[25405.18]  [a02d53cd] btrfs_commit_transaction+0x4ed/0xab0 
[btrfs]
[25405.18]  [8109c170] ? bit_waitqueue+0xc0/0xc0
[25405.18]  [a02d6259] start_transaction+0x459/0x620 [btrfs]
[25405.18]  [a02d67ab] btrfs_start_transaction+0x1b/0x20 
[btrfs]
[25405.18]  [a02e3407] btrfs_create+0x47/0x210 [btrfs]
[25405.18]  [a02d74cc] ? btrfs_permission+0x3c/0x80 [btrfs]
[25405.18]  [811bc63b] vfs_create+0x9b/0x130
[25405.18]  [811bcf19] do_last+0x849/0xe20
[25405.18]  [811b9409] ? link_path_walk+0x79/0x820
[25405.18]  [811bd5b5] path_openat+0xc5/0x690
[25405.18]  [810ab07d] ? trace_hardirqs_on+0xd/0x10
[25405.18]  [811cdcd2] ? __alloc_fd+0x32/0x1d0
[25405.18]  [811be2a3] do_filp_open+0x43/0xa0
[25405.18]  [811cddf1] ? __alloc_fd+0x151/0x1d0
[25405.18]  [811abcfc] do_sys_open+0x13c/0x230
[25405.18]  [810aaea6] ? trace_hardirqs_on_caller+0x16/0x1e0
[25405.18]  [811abe12] SyS_open+0x22/0x30
[25405.18]  [81698452] system_call_fastpath+0x16/0x1b
[25405.18] Code: 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 89 f1 48 c7 
c2 51 13 36 a0 48 89 fe 31 c0 48 c7 c7 d0 43 36 a0 48 89 e5 e8 6d b5 32 e1 0f 
0b 0f 1f 44 00 00 55 b9 11 00 00 00 48 89 e5 41 55 49 89 f5
[25405.18] RIP  [a03570ca] assfail.constprop.88+0x1e/0x20 
[btrfs]

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Removed test for current-journal_info == NULL. At this point it's
always expected to be NULL.

 fs/btrfs/transaction.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ac984a3..614eac3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -491,6 +491,7 @@ again:
smp_mb();
if (cur_trans-state = TRANS_STATE_BLOCKED 
may_wait_transaction(root, type)) {
+   current-journal_info = h

[PATCH] Btrfs: assert send doesn't attempt to start transactions

2014-06-24 Thread Filipe David Borba Manana
When starting a transaction just assert that current-journal_info
doesn't contain a send transaction stub, since send isn't supposed
to start transactions and when it finishes (either successfully or
not) it's supposed to set current-journal_info to NULL.

This is motivated by the change titled:

Btrfs: fix crash when starting transaction

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/transaction.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 614eac3..47870ca 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -386,11 +386,13 @@ start_transaction(struct btrfs_root *root, u64 num_items, 
unsigned int type,
bool reloc_reserved = false;
int ret;
 
+   /* Send isn't supposed to start transactions. */
+   ASSERT(current-journal_info != (void *)BTRFS_SEND_TRANS_STUB);
+
if (test_bit(BTRFS_FS_STATE_ERROR, root-fs_info-fs_state))
return ERR_PTR(-EROFS);
 
-   if (current-journal_info 
-   current-journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
+   if (current-journal_info) {
WARN_ON(type  TRANS_EXTWRITERS);
h = current-journal_info;
h-use_count++;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: implement support for fallocate collapse range

2014-06-23 Thread Filipe David Borba Manana
This implements fallocate's FALLOC_FL_COLLAPSE_RANGE operation for
BTRFS. This fallocate operation was introduced in the linux kernel
version 3.15.

Existing tests in xfstests already test this operation explicitly
and implicitly via fsstress.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ctree.c   |  42 -
 fs/btrfs/ctree.h   |   2 +
 fs/btrfs/extent-tree.c |  30 +--
 fs/btrfs/file.c| 486 +
 4 files changed, 453 insertions(+), 107 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index aeab453..8f1a371 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2825,12 +2825,12 @@ cow_done:
 * It is safe to drop the lock on our parent before we
 * go through the expensive btree search on b.
 *
-* If we're inserting or deleting (ins_len != 0), then we might
-* be changing slot zero, which may require changing the parent.
-* So, we can't drop the lock until after we know which slot
-* we're operating on.
+* If we're inserting, deleting or updating a key (cow != 0),
+* then we might be changing slot zero, which may require
+* changing the parent. So, we can't drop the lock until after
+* we know which slot we're operating on.
 */
-   if (!ins_len  !p-keep_locks) {
+   if (!cow  !p-keep_locks) {
int u = level + 1;
 
if (u  BTRFS_MAX_LEVEL  p-locks[u]) {
@@ -2865,7 +2865,7 @@ cow_done:
 * which means we must have a write lock
 * on the parent
 */
-   if (slot == 0  ins_len 
+   if (slot == 0  cow 
write_lock_level  level + 1) {
write_lock_level = level + 1;
btrfs_release_path(p);
@@ -5660,6 +5660,36 @@ next:
 }
 
 /*
+ * This differs from btrfs_find_next_key in that it ignores leaf/node
+ * generations and it doesn't unlock and re-lock nodes/leaves nor does
+ * any subsequent searches (calls to btrfs_search_slot), preserving the
+ * locks in the given path.
+ *
+ * Returns 0 if a next key exists, 1 otherwise.
+ */
+int btrfs_find_next_current_key(struct btrfs_path *path, int level,
+   struct btrfs_key *key)
+
+{
+   for (; level  BTRFS_MAX_LEVEL; level++) {
+   if (!path-nodes[level])
+   break;
+   if (path-slots[level] + 1 =
+   btrfs_header_nritems(path-nodes[level]))
+   continue;
+   if (level == 0)
+   btrfs_item_key_to_cpu(path-nodes[level], key,
+ path-slots[level] + 1);
+   else
+   btrfs_node_key_to_cpu(path-nodes[level], key,
+ path-slots[level] + 1);
+   return 0;
+   }
+   return 1;
+}
+
+
+/*
  * search the tree again to find a leaf with greater keys
  * returns 0 if it found something or 1 if there are no greater leaves.
  * returns  0 on io errors.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b7e2c1c..166a35f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3446,6 +3446,8 @@ struct extent_buffer *btrfs_lock_root_node(struct 
btrfs_root *root);
 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level,
u64 min_trans);
+int btrfs_find_next_current_key(struct btrfs_path *path, int level,
+   struct btrfs_key *key);
 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
 struct btrfs_path *path,
 u64 min_trans);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fafb3e5..a6d0ec7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -100,8 +100,6 @@ static int alloc_reserved_tree_block(struct 
btrfs_trans_handle *trans,
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
  struct btrfs_root *extent_root, u64 flags,
  int force);
-static int find_next_key(struct btrfs_path *path, int level,
-struct btrfs_key *key);
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
 static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
@@ -440,7 +438,7 @@ next:
if (path-slots[0]  nritems) {
btrfs_item_key_to_cpu(leaf, key, path-slots[0]);
} else {
-   ret = find_next_key(path, 0

[PATCH] generic/017: skip invalid block sizes for btrfs

2014-06-23 Thread Filipe David Borba Manana
In btrfs the block size (called sector size in btrfs) can not be
smaller then the page size. Therefore skip block sizes smaller
then page size if the fs is btrfs, so that the test can succeed
on btrfs (testing only with block sizes of 4kb on systems with a
page size of 4Kb).

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/generic/017 | 8 
 1 file changed, 8 insertions(+)

diff --git a/tests/generic/017 b/tests/generic/017
index 13b7254..6495be5 100755
--- a/tests/generic/017
+++ b/tests/generic/017
@@ -51,6 +51,14 @@ BLOCKS=10240
 
 for (( BSIZE = 1024; BSIZE = 4096; BSIZE *= 2 )); do
 
+   # btrfs doesn't support block size smaller then page size
+   if [ $FSTYP == btrfs ]; then
+   if (( $BSIZE  `getconf PAGE_SIZE` )); then
+   echo 80
+   continue
+   fi
+   fi
+
length=$(($BLOCKS * $BSIZE))
case $FSTYP in
xfs)
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/6 v5] Btrfs: send, implement total data size command to allow for progress estimation

2014-06-23 Thread Filipe David Borba Manana
This new send flag makes send calculate first the amount of new file data (in 
bytes)
the send root has relatively to the parent root, or for the case of a 
non-incremental
send, the total amount of file data the stream will create (including holes and 
prealloc
extents). In other words, it computes the sum of the lengths of all write, 
clone and
fallocate operations that will be sent through the send stream.

This data size value is sent in a new command, named 
BTRFS_SEND_C_TOTAL_DATA_SIZE, that
immediately follows a BTRFS_SEND_C_SUBVOL or BTRFS_SEND_C_SNAPSHOT command, and 
precedes
any command that changes a file or the filesystem hierarchy. Upon receiving a 
write, clone
or fallocate command, the receiving end can increment a counter by the data 
length of that
command and therefore report progress by comparing the counter's value with the 
data size
value received in the BTRFS_SEND_C_TOTAL_DATA_SIZE command.

The approach is simple, before the normal operation of send, do a scan in the 
file system
tree for new inodes and new/changed file extent items, just like in send's 
normal operation,
and keep incrementing a counter with new inodes' size and the size of file 
extents (and file
holes)  that are going to be written, cloned or fallocated. This is actually a 
simpler and
more lightweight tree scan/processing than the one we do when sending the 
changes, as it
doesn't process inode references nor does any lookups in the extent tree for 
example.

After modifying btrfs-progs to understand this new command and report progress, 
here's an
example (the -o flag tells btrfs send to pass the new flag to the kernel's send 
ioctl):

$ btrfs send -s --stream-version 2 /mnt/sdd/snap_base | btrfs receive 
/mnt/sdc
At subvol /mnt/sdd/snap_base
At subvol snap_base
About to receive 9212392667 bytes
Subvolume /mnt/sdc//snap_base, 4059722426 / 9212392667 bytes received, 
44.07%, 40.32MB/s

$ btrfs send -s --stream-version 2 -p /mnt/sdd/snap_base /mnt/sdd/snap_incr 
| btrfs receive /mnt/sdc
At subvol /mnt/sdd/snap_incr
At subvol snap_incr
About to receive 9571342213 bytes
Subvolume /mnt/sdc//snap_incr, 6557345221 / 9571342213 bytes received, 
68.51%, 51.04MB/s

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.
V3: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE and added 
BTRFS_SEND_FLAG_STREAM_V2,
added commands for inode set flags and otime.
V4: There's no v4, bumped directly to v5 to make all patches in the series have 
the same
version.
V5: Rebased against latest chris/integration branch.

 fs/btrfs/send.c | 194 ++--
 1 file changed, 162 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d7ef14b..dd6f5ec 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -81,7 +81,13 @@ struct clone_root {
 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128
 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
 
+enum btrfs_send_phase {
+   SEND_PHASE_STREAM_CHANGES,
+   SEND_PHASE_COMPUTE_DATA_SIZE,
+};
+
 struct send_ctx {
+   enum btrfs_send_phase phase;
struct file *send_filp;
loff_t send_off;
char *send_buf;
@@ -116,6 +122,7 @@ struct send_ctx {
u64 cur_inode_last_extent;
 
u64 send_progress;
+   u64 total_data_size;
 
struct list_head new_refs;
struct list_head deleted_refs;
@@ -696,6 +703,8 @@ static int send_rename(struct send_ctx *sctx,
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_rename %s - %s\n, from-start, to-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
@@ -720,6 +729,8 @@ static int send_link(struct send_ctx *sctx,
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_link %s - %s\n, path-start, lnk-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
@@ -743,6 +754,8 @@ static int send_unlink(struct send_ctx *sctx, struct 
fs_path *path)
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_unlink %s\n, path-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
@@ -765,6 +778,8 @@ static int send_rmdir(struct send_ctx *sctx, struct fs_path 
*path)
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_rmdir %s\n, path-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
@@ -2325,6 +2340,9 @@ static int send_truncate(struct send_ctx *sctx, u64 ino, 
u64 gen, u64 size)
int ret = 0;
struct fs_path *p;
 
+   if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE)
+   return 0

[PATCH 3/6 v5] Btrfs: send, use fallocate command to punch holes

2014-06-23 Thread Filipe David Borba Manana
Instead of sending a write command with a data buffer filled with 0 value bytes,
use the fallocate command, introduced in the send stream version 2, to tell the
receiver to punch a file hole using the fallocate system call.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.
V3: Added missing path allocation, messed up rebase.
V4: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE and added 
BTRFS_SEND_FLAG_STREAM_V2,
added commands for inode set flags and otime.
V5: Rebased against latest chris/integration branch.

 fs/btrfs/send.c | 55 ---
 fs/btrfs/send.h |  4 
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index dd6f5ec..300eaee 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -569,6 +569,7 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const 
void *data, int len)
return tlv_put(sctx, attr, __tmp, sizeof(__tmp));  \
}
 
+TLV_PUT_DEFINE_INT(32)
 TLV_PUT_DEFINE_INT(64)
 
 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
@@ -4500,18 +4501,59 @@ out:
return ret;
 }
 
+static int send_fallocate(struct send_ctx *sctx, u32 flags,
+ u64 offset, u64 len)
+{
+   struct fs_path *p = NULL;
+   int ret = 0;
+
+   ASSERT(sctx-flags  BTRFS_SEND_FLAG_STREAM_V2);
+
+   if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
+   sctx-total_data_size += len;
+   return 0;
+   }
+
+   p = fs_path_alloc();
+   if (!p)
+   return -ENOMEM;
+   ret = get_cur_path(sctx, sctx-cur_ino, sctx-cur_inode_gen, p);
+   if (ret  0)
+   goto out;
+
+   ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
+   if (ret  0)
+   goto out;
+   TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+   TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_FLAGS, flags);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
+   ret = send_cmd(sctx);
+
+tlv_put_failure:
+out:
+   fs_path_free(p);
+   return ret;
+}
+
 static int send_hole(struct send_ctx *sctx, u64 end)
 {
struct fs_path *p = NULL;
u64 offset = sctx-cur_inode_last_extent;
-   u64 len;
+   u64 len = end - offset;
int ret = 0;
 
if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
-   sctx-total_data_size += end - offset;
+   sctx-total_data_size += len;
return 0;
}
 
+   if (sctx-flags  BTRFS_SEND_FLAG_STREAM_V2)
+   return send_fallocate(sctx,
+ BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+ offset,
+ len);
+
p = fs_path_alloc();
if (!p)
return -ENOMEM;
@@ -4568,7 +4610,8 @@ static int send_write_or_clone(struct send_ctx *sctx,
len = btrfs_file_extent_num_bytes(path-nodes[0], ei);
}
 
-   if (offset + len  sctx-cur_inode_size)
+   if (offset  sctx-cur_inode_size 
+   offset + len  sctx-cur_inode_size)
len = sctx-cur_inode_size - offset;
if (len == 0) {
ret = 0;
@@ -4585,6 +4628,12 @@ static int send_write_or_clone(struct send_ctx *sctx,
ret = send_clone(sctx, offset, len, clone_root);
} else if (sctx-flags  BTRFS_SEND_FLAG_NO_FILE_DATA) {
ret = send_update_extent(sctx, offset, len);
+   } else if (btrfs_file_extent_disk_bytenr(path-nodes[0], ei) == 0 
+  type != BTRFS_FILE_EXTENT_INLINE 
+  (sctx-flags  BTRFS_SEND_FLAG_STREAM_V2) 
+  offset  sctx-cur_inode_size) {
+   ret = send_fallocate(sctx, BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+offset, len);
} else {
while (pos  len) {
l = len - pos;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 96f583c..987936c 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -148,6 +148,10 @@ enum {
 #define BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE   (1  0)
 #define BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE  (1  1)
 
+#define BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS\
+   (BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE |  \
+BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE)
+
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
 #endif
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/6 v5] Btrfs: send, use fallocate command to allocate extents

2014-06-23 Thread Filipe David Borba Manana
The send stream version 2 adds the fallocate command, which can be used to
allocate extents for a file or punch holes in a file. Previously we were
ignoring file prealloc extents or treating them as extents filled with 0
bytes and sending a regular write command to the stream.

After this change, together with my previous change titled:

Btrfs: send, use fallocate command to punch holes

an incremental send preserves the hole and data structure of files, which can
be seen via calls to lseek with the whence parameter set to SEEK_DATA or 
SEEK_HOLE,
as the example below shows:

mkfs.btrfs -f /dev/sdc
mount /dev/sdc /mnt
xfs_io -f -c pwrite -S 0x01 -b 30 0 30 /mnt/foo
btrfs subvolume snapshot -r /mnt /mnt/mysnap1

xfs_io -c fpunch 10 5 /mnt/foo
xfs_io -c falloc 10 5 /mnt/foo
xfs_io -c pwrite -S 0xff -b 1000 12 1000 /mnt/foo
xfs_io -c fpunch 25 2 /mnt/foo

# prealloc extents that start beyond the inode's size
xfs_io -c falloc -k 30 100 /mnt/foo
xfs_io -c falloc -k 900 200 /mnt/foo

btrfs subvolume snapshot -r /mnt /mnt/mysnap2

btrfs send /mnt/mysnap1 -f /tmp/1.snap
btrfs send -p /mnt/mysnap1 /mnt/mysnap2 -f /tmp/2.snap

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt2
btrfs receive /mnt2 -f /tmp/1.snap
btrfs receive /mnt2 -f /tmp/2.snap

Before this change the hole/data structure differed between both filesystems:

$ xfs_io -r -c 'seek -r -a 0' /mnt/mysnap2/foo
Whence  Result
DATA0
HOLE102400
DATA118784
HOLE122880
DATA147456
HOLE253952
DATA266240
HOLE30

$ xfs_io -r -c 'seek -r -a 0' /mnt2/mysnap2/foo
Whence  Result
DATA0
HOLE30

After this change the second filesystem (/dev/sdd) ends up with the same 
hole/data
structure as the first filesystem.

Also, after this change, prealloc extents that lie beyond the inode's size (were
allocated with fallocate + keep size flag) are also replicated by an incremental
send. For the above test, it can be observed via fiemap (or btrfs-debug-tree):

$ xfs_io -r -c 'fiemap -l' /mnt2/mysnap2/foo
0: [0..191]: 25096..25287 192 blocks
1: [192..199]: 24672..24679 8 blocks
2: [200..231]: 24584..24615 32 blocks
3: [232..239]: 24680..24687 8 blocks
4: [240..287]: 24616..24663 48 blocks
5: [288..295]: 24688..24695 8 blocks
6: [296..487]: 25392..25583 192 blocks
7: [488..495]: 24696..24703 8 blocks
8: [496..519]: hole 24 blocks
9: [520..527]: 24704..24711 8 blocks
10: [528..583]: 25624..25679 56 blocks
11: [584..591]: 24712..24719 8 blocks
12: [592..2543]: 26192..28143 1952 blocks
13: [2544..17575]: hole 15032 blocks
14: [17576..21487]: 28144..32055 3912 blocks

The test for xfstests was already merged (btrfs/047) that verifies that
a send stream version 2 does space pre-allocation and hole punching.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Added new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE. A version 2
stream is now only produced is the ioctl caller specifies at least one of
the new send flags (BTRFS_SEND_FLAG_SUPPORT_FALLOCATE or
BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE).
V3: Fixed rebase, removed some duplicate logic on truncate + falloc -k.
V4: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE and added 
BTRFS_SEND_FLAG_STREAM_V2,
added commands for inode set flags and otime.
V5: Rebased against latest chris/integration branch and updated commit message.

 fs/btrfs/send.c | 78 +
 1 file changed, 57 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 300eaee..873eeb1 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -113,9 +113,10 @@ struct send_ctx {
 */
u64 cur_ino;
u64 cur_inode_gen;
-   int cur_inode_new;
-   int cur_inode_new_gen;
-   int cur_inode_deleted;
+   u8 cur_inode_new:1;
+   u8 cur_inode_new_gen:1;
+   u8 cur_inode_skip_truncate:1;
+   u8 cur_inode_deleted:1;
u64 cur_inode_size;
u64 cur_inode_mode;
u64 cur_inode_rdev;
@@ -4580,6 +4581,19 @@ tlv_put_failure:
return ret;
 }
 
+static int truncate_before_falloc(struct send_ctx *sctx)
+{
+   int ret = 0;
+
+   if (!sctx-cur_inode_skip_truncate) {
+   ret = send_truncate(sctx, sctx-cur_ino,
+   sctx-cur_inode_gen,
+   sctx-cur_inode_size);
+   sctx-cur_inode_skip_truncate = 1;
+   }
+   return ret;
+}
+
 static int send_write_or_clone(struct send_ctx *sctx,
   struct btrfs_path *path,
   struct btrfs_key *key,
@@ -4619,8 +4633,7 @@ static int send_write_or_clone(struct send_ctx *sctx

[PATCH 1/6 v5] Btrfs: send, bump stream version

2014-06-23 Thread Filipe David Borba Manana
This increases the send stream version from version 1 to version 2, adding
new commands:

1) total data size - used to tell the receiver how much file data the stream
   will add or update;

2) fallocate - used to pre-allocate space for files and to punch holes in files;

3) inode set flags;

4) set inode otime.

This is preparation work for subsequent changes that implement the new features.

A version 2 stream is only produced if the send ioctl caller passes in one of 
the
new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | BTRFS_SEND_FLAG_STREAM_V2), 
meaning
old clients are unaffected.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.
V3: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE and added 
BTRFS_SEND_FLAG_STREAM_V2,
added commands for inode set flags and otime.
V4: There's no v4, bumped directly to v5 to make all patches in the series have 
the same
version.
V5: Rebased against latest chris/integration branch.

 fs/btrfs/send.c|  7 ++-
 fs/btrfs/send.h| 21 -
 include/uapi/linux/btrfs.h | 21 -
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6528aa6..d7ef14b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -637,7 +637,10 @@ static int send_header(struct send_ctx *sctx)
struct btrfs_stream_header hdr;
 
strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
-   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
+   if (sctx-flags  BTRFS_SEND_FLAG_STREAM_V2)
+   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION_2);
+   else
+   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION_1);
 
return write_buf(sctx-send_filp, hdr, sizeof(hdr),
sctx-send_off);
@@ -5572,6 +5575,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user 
*arg_)
INIT_LIST_HEAD(sctx-name_cache_list);
 
sctx-flags = arg-flags;
+   if (sctx-flags  BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE)
+   sctx-flags |= BTRFS_SEND_FLAG_STREAM_V2;
 
sctx-send_filp = fget(arg-send_fd);
if (!sctx-send_filp) {
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 48d425a..96f583c 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -20,7 +20,8 @@
 #include ctree.h
 
 #define BTRFS_SEND_STREAM_MAGIC btrfs-stream
-#define BTRFS_SEND_STREAM_VERSION 1
+#define BTRFS_SEND_STREAM_VERSION_1 1
+#define BTRFS_SEND_STREAM_VERSION_2 2
 
 #define BTRFS_SEND_BUF_SIZE (1024 * 64)
 #define BTRFS_SEND_READ_SIZE (1024 * 48)
@@ -87,6 +88,15 @@ enum btrfs_send_cmd {
 
BTRFS_SEND_C_END,
BTRFS_SEND_C_UPDATE_EXTENT,
+
+   /*
+* The following commands were added in stream version 2.
+*/
+   BTRFS_SEND_C_TOTAL_DATA_SIZE,
+   BTRFS_SEND_C_FALLOCATE,
+   BTRFS_SEND_C_INODE_SET_FLAGS,
+   BTRFS_SEND_C_UTIMES2, /* Same as UTIMES, but it includes OTIME too. */
+
__BTRFS_SEND_C_MAX,
 };
 #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
@@ -125,10 +135,19 @@ enum {
BTRFS_SEND_A_CLONE_OFFSET,
BTRFS_SEND_A_CLONE_LEN,
 
+   /*
+* The following attributes were added in stream version 2.
+*/
+   BTRFS_SEND_A_FALLOCATE_FLAGS, /* 32 bits */
+   BTRFS_SEND_A_INODE_FLAGS, /* 32 bits */
+
__BTRFS_SEND_A_MAX,
 };
 #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
 
+#define BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE   (1  0)
+#define BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE  (1  1)
+
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
 #endif
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 6f9c38c..62440d8 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -477,10 +477,29 @@ struct btrfs_ioctl_received_subvol_args {
  */
 #define BTRFS_SEND_FLAG_OMIT_END_CMD   0x4
 
+/*
+ * Calculate the amount (in bytes) of new file data between the send and
+ * parent snapshots, or in case of a full send, the total amount of file data
+ * we will send.
+ * This corresponds to the sum of the data lengths of each write, clone and
+ * fallocate commands that are sent through the send stream. The receiving end
+ * can use this information to compute progress.
+ *
+ * Added in send stream version 2, and implies producing a version 2 stream.
+ */
+#define BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE0x8
+
+/*
+ * Used by a client to request a version 2 of the send stream.
+ */
+#define BTRFS_SEND_FLAG_STREAM_V2  0x10
+
 #define BTRFS_SEND_FLAG_MASK \
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
 BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
-BTRFS_SEND_FLAG_OMIT_END_CMD)
+BTRFS_SEND_FLAG_OMIT_END_CMD

[PATCH 5/6 v5] Btrfs: add missing cleanup on sysfs init failure

2014-06-23 Thread Filipe David Borba Manana
If we failed during initialization of sysfs, we weren't unregistering the
top level btrfs sysfs entry nor the debugfs stuff.
Not unregistering the top level sysfs entry makes future attempts to reload
the btrfs module impossible and the following is reported in dmesg:

[ 2246.451296] WARNING: CPU: 3 PID: 10999 at fs/sysfs/dir.c:486 
sysfs_warn_dup+0x91/0xb0()
[ 2246.451298] sysfs: cannot create duplicate filename '/fs/btrfs'
[ 2246.451298] Modules linked in: btrfs(+) raid6_pq xor bnep rfcomm bluetooth 
binfmt_misc nfsd auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc 
parport_pc parport psmouse serio_raw pcspkr evbug i2c_piix4 e1000 floppy [last 
unloaded: btrfs]
[ 2246.451310] CPU: 3 PID: 10999 Comm: modprobe Tainted: GW
3.13.0-fdm-btrfs-next-24+ #7
[ 2246.451311] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 2246.451312]  0009 8800d353fa08 816f1da6 
0410
[ 2246.451314]  8800d353fa58 8800d353fa48 8104a32c 
88020821a290
[ 2246.451316]  88020821a290 88020821a290 8802148f 
8800d353fb80
[ 2246.451318] Call Trace:
[ 2246.451322]  [816f1da6] dump_stack+0x4e/0x68
[ 2246.451324]  [8104a32c] warn_slowpath_common+0x8c/0xc0
[ 2246.451325]  [8104a416] warn_slowpath_fmt+0x46/0x50
[ 2246.451328]  [81367dc5] ? strlcat+0x65/0x90
()

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V1..V4: There's no v1, v2, v3 and v4. Bumped directly to v5 to make all patches
in the series have the same version.
V5: Rebased against latest chris/integration branch.

 fs/btrfs/sysfs.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index df39458..06ad529 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -710,10 +710,18 @@ int btrfs_init_sysfs(void)
 
ret = btrfs_init_debugfs();
if (ret)
-   return ret;
+   goto out1;
 
init_feature_attrs();
ret = sysfs_create_group(btrfs_kset-kobj, btrfs_feature_attr_group);
+   if (ret)
+   goto out2;
+
+   return 0;
+out2:
+   debugfs_remove_recursive(btrfs_debugfs_root_dentry);
+out1:
+   kset_unregister(btrfs_kset);
 
return ret;
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/6 v5] Btrfs: add send_stream_version attribute to sysfs

2014-06-23 Thread Filipe David Borba Manana
So that applications can find out what's the highest send stream
version supported/implemented by the running kernel:

$ cat /sys/fs/btrfs/send/stream_version
2

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
Reviewed-by: David Sterba dste...@suse.cz
---

V1..V4: There's no v1, v2, v3 and v4. Bumped directly to v5 to make all patches
in the series have the same version.
V5: Rebased against latest chris/integration branch.

 fs/btrfs/send.h  |  1 +
 fs/btrfs/sysfs.c | 27 +++
 2 files changed, 28 insertions(+)

diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 987936c..047fd6d 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -22,6 +22,7 @@
 #define BTRFS_SEND_STREAM_MAGIC btrfs-stream
 #define BTRFS_SEND_STREAM_VERSION_1 1
 #define BTRFS_SEND_STREAM_VERSION_2 2
+#define BTRFS_SEND_STREAM_VERSION_LATEST BTRFS_SEND_STREAM_VERSION_2
 
 #define BTRFS_SEND_BUF_SIZE (1024 * 64)
 #define BTRFS_SEND_READ_SIZE (1024 * 48)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 06ad529..9869d94 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -31,6 +31,7 @@
 #include transaction.h
 #include sysfs.h
 #include volumes.h
+#include send.h
 
 static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
 
@@ -700,6 +701,26 @@ static int btrfs_init_debugfs(void)
return 0;
 }
 
+static ssize_t send_stream_version_show(struct kobject *kobj,
+   struct kobj_attribute *a,
+   char *buf)
+{
+   return snprintf(buf, PAGE_SIZE, %d\n,
+   BTRFS_SEND_STREAM_VERSION_LATEST);
+}
+
+BTRFS_ATTR(stream_version, 0444, send_stream_version_show);
+
+static struct attribute *btrfs_send_attrs[] = {
+   BTRFS_ATTR_PTR(stream_version),
+   NULL
+};
+
+static const struct attribute_group btrfs_send_attr_group = {
+   .name = send,
+   .attrs = btrfs_send_attrs,
+};
+
 int btrfs_init_sysfs(void)
 {
int ret;
@@ -716,8 +737,13 @@ int btrfs_init_sysfs(void)
ret = sysfs_create_group(btrfs_kset-kobj, btrfs_feature_attr_group);
if (ret)
goto out2;
+   ret = sysfs_create_group(btrfs_kset-kobj, btrfs_send_attr_group);
+   if (ret)
+   goto out3;
 
return 0;
+out3:
+   sysfs_remove_group(btrfs_kset-kobj, btrfs_feature_attr_group);
 out2:
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
 out1:
@@ -729,6 +755,7 @@ out1:
 void btrfs_exit_sysfs(void)
 {
sysfs_remove_group(btrfs_kset-kobj, btrfs_feature_attr_group);
+   sysfs_remove_group(btrfs_kset-kobj, btrfs_send_attr_group);
kset_unregister(btrfs_kset);
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix crash when starting transaction

2014-06-23 Thread Filipe David Borba Manana
Often when starting a transaction we commit the currently running transaction,
which can end up writing block group caches when the current process has its
journal_info set to NULL (and not to a transaction). This makes our assertion
at btrfs_check_data_free_space() (current_journal != NULL) fail, resulting
in a crash/hang. Therefore fix it by setting journal_info.

Two different traces of this issue follow below.

1)

[51502.241936] BTRFS: assertion failed: current-journal_info, file: 
fs/btrfs/extent-tree.c, line: 3670
[51502.242213] [ cut here ]
[51502.242493] kernel BUG at fs/btrfs/ctree.h:3964!
[51502.242669] invalid opcode:  [#1] SMP DEBUG_PAGEALLOC
(...)
[51502.244010] Call Trace:
[51502.244010]  [a02bc025] 
btrfs_check_data_free_space+0x395/0x3a0 [btrfs]
[51502.244010]  [a02c3bdc] 
btrfs_write_dirty_block_groups+0x4ac/0x640 [btrfs]
[51502.244010]  [a0357a6a] commit_cowonly_roots+0x164/0x226 
[btrfs]
[51502.244010]  [a02d53cd] btrfs_commit_transaction+0x4ed/0xab0 
[btrfs]
[51502.244010]  [8168ec7b] ? _raw_spin_unlock+0x2b/0x40
[51502.244010]  [a02d6259] start_transaction+0x459/0x620 [btrfs]
[51502.244010]  [a02d67ab] btrfs_start_transaction+0x1b/0x20 
[btrfs]
[51502.244010]  [a02d73e1] __unlink_start_trans+0x31/0xe0 [btrfs]
[51502.244010]  [a02dea67] btrfs_unlink+0x37/0xc0 [btrfs]
[51502.244010]  [811bb054] ? do_unlinkat+0x114/0x2a0
[51502.244010]  [811baebc] vfs_unlink+0xcc/0x150
[51502.244010]  [811bb1a0] do_unlinkat+0x260/0x2a0
[51502.244010]  [811a9ef4] ? filp_close+0x64/0x90
[51502.244010]  [810aaea6] ? trace_hardirqs_on_caller+0x16/0x1e0
[51502.244010]  [81349cab] ? trace_hardirqs_on_thunk+0x3a/0x3f
[51502.244010]  [811be9eb] SyS_unlinkat+0x1b/0x40
[51502.244010]  [81698452] system_call_fastpath+0x16/0x1b
[51502.244010] Code: 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 89 f1 48 c7 
c2 71 13 36 a0 48 89 fe 31 c0 48 c7 c7 b8 43 36 a0 48 89 e5 e8 5d b0 32 e1 0f 
0b 0f 1f 44 00 00 55 b9 11 00 00 00 48 89 e5 41 55 49 89 f5
[51502.244010] RIP  [a03575da] assfail.constprop.88+0x1e/0x20 
[btrfs]

2)

[25405.097230] BTRFS: assertion failed: current-journal_info, file: 
fs/btrfs/extent-tree.c, line: 3670
[25405.097488] [ cut here ]
[25405.097767] kernel BUG at fs/btrfs/ctree.h:3964!
[25405.097940] invalid opcode:  [#1] SMP DEBUG_PAGEALLOC
(...)
[25405.18] Call Trace:
[25405.18]  [a02bc025] 
btrfs_check_data_free_space+0x395/0x3a0 [btrfs]
[25405.18]  [a02c3bdc] 
btrfs_write_dirty_block_groups+0x4ac/0x640 [btrfs]
[25405.18]  [a035755a] commit_cowonly_roots+0x164/0x226 
[btrfs]
[25405.18]  [a02d53cd] btrfs_commit_transaction+0x4ed/0xab0 
[btrfs]
[25405.18]  [8109c170] ? bit_waitqueue+0xc0/0xc0
[25405.18]  [a02d6259] start_transaction+0x459/0x620 [btrfs]
[25405.18]  [a02d67ab] btrfs_start_transaction+0x1b/0x20 
[btrfs]
[25405.18]  [a02e3407] btrfs_create+0x47/0x210 [btrfs]
[25405.18]  [a02d74cc] ? btrfs_permission+0x3c/0x80 [btrfs]
[25405.18]  [811bc63b] vfs_create+0x9b/0x130
[25405.18]  [811bcf19] do_last+0x849/0xe20
[25405.18]  [811b9409] ? link_path_walk+0x79/0x820
[25405.18]  [811bd5b5] path_openat+0xc5/0x690
[25405.18]  [810ab07d] ? trace_hardirqs_on+0xd/0x10
[25405.18]  [811cdcd2] ? __alloc_fd+0x32/0x1d0
[25405.18]  [811be2a3] do_filp_open+0x43/0xa0
[25405.18]  [811cddf1] ? __alloc_fd+0x151/0x1d0
[25405.18]  [811abcfc] do_sys_open+0x13c/0x230
[25405.18]  [810aaea6] ? trace_hardirqs_on_caller+0x16/0x1e0
[25405.18]  [811abe12] SyS_open+0x22/0x30
[25405.18]  [81698452] system_call_fastpath+0x16/0x1b
[25405.18] Code: 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 89 f1 48 c7 
c2 51 13 36 a0 48 89 fe 31 c0 48 c7 c7 d0 43 36 a0 48 89 e5 e8 6d b5 32 e1 0f 
0b 0f 1f 44 00 00 55 b9 11 00 00 00 48 89 e5 41 55 49 89 f5
[25405.18] RIP  [a03570ca] assfail.constprop.88+0x1e/0x20 
[btrfs]

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/transaction.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ac984a3..fe4abe9 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -491,7 +491,11 @@ again:
smp_mb();
if (cur_trans-state = TRANS_STATE_BLOCKED 
may_wait_transaction(root, type)) {
+   void *journal_info = current-journal_info;
+   if (!journal_info)
+   current-journal_info = h

[PATCH] Btrfs: remove unused wait queue in struct extent_buffer

2014-06-16 Thread Filipe David Borba Manana
The lock_wq wait queue is not used anywhere, therefore just remove it.
On a x86_64 system, this reduced sizeof(struct extent_buffer) from 320
bytes down to 296 bytes, which means a 4Kb page can now be used for
13 extent buffers instead of 12.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/extent_io.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 8b63f2d..dbbea4f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -158,7 +158,6 @@ struct extent_buffer {
 * to unlock
 */
wait_queue_head_t read_lock_wq;
-   wait_queue_head_t lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
 #ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix qgroups sanity test crash or hang

2014-06-11 Thread Filipe David Borba Manana
Often when running the qgroups sanity test, a crash or a hang happened.
This is because the extent buffer the test uses for the root node doesn't
have an header level explicitly set, making it have a random level value.
This is a problem when it's not zero for the btrfs_search_slot() calls
the test ends up doing, resulting in crashes or hangs such as the following:

[ 6454.127192] Btrfs loaded, debug=on, assert=on, integrity-checker=on
(...)
[ 6454.127760] BTRFS: selftest: Running qgroup tests
[ 6454.127964] BTRFS: selftest: Running test_test_no_shared_qgroup
[ 6454.127966] BTRFS: selftest: Qgroup basic add
[ 6480.152005] BUG: soft lockup - CPU#0 stuck for 23s! [modprobe:5383]
[ 6480.152005] Modules linked in: btrfs(+) xor raid6_pq binfmt_misc nfsd 
auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc i2c_piix4 i2c_core 
pcspkr evbug psmouse serio_raw e1000 [last unloaded: btrfs]
[ 6480.152005] irq event stamp: 188448
[ 6480.152005] hardirqs last  enabled at (188447): [8168ef5c] 
restore_args+0x0/0x30
[ 6480.152005] hardirqs last disabled at (188448): [81698e6a] 
apic_timer_interrupt+0x6a/0x80
[ 6480.152005] softirqs last  enabled at (188446): [810516cf] 
__do_softirq+0x1cf/0x450
[ 6480.152005] softirqs last disabled at (188441): [81051c25] 
irq_exit+0xb5/0xc0
[ 6480.152005] CPU: 0 PID: 5383 Comm: modprobe Not tainted 
3.15.0-rc8-fdm-btrfs-next-33+ #4
[ 6480.152005] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 6480.152005] task: 8802146125a0 ti: 8800d0d0 task.ti: 
8800d0d0
[ 6480.152005] RIP: 0010:[81349a63]  [81349a63] 
__write_lock_failed+0x13/0x20
[ 6480.152005] RSP: 0018:8800d0d038e8  EFLAGS: 0287
[ 6480.152005] RAX:  RBX: 8168ef5c RCX: 05deb8525852
[ 6480.152005] RDX:  RSI: 1d45 RDI: 8802105000b8
[ 6480.152005] RBP: 8800d0d038e8 R08: fe12710f63db R09: a03196fb
[ 6480.152005] R10: 8802146125a0 R11: 880214612e28 R12: 8800d0d03858
[ 6480.152005] R13:  R14: 8800d0d0 R15: 8802146125a0
[ 6480.152005] FS:  7f14ff804700() GS:880215e0() 
knlGS:
[ 6480.152005] CS:  0010 DS:  ES:  CR0: 8005003b
[ 6480.152005] CR2: 7fff4df0dac8 CR3: d1796000 CR4: 06f0
[ 6480.152005] Stack:
[ 6480.152005]  8800d0d03908 810ae967 0001 
8802105000b8
[ 6480.152005]  8800d0d03938 8168e57e a0319c16 
0007
[ 6480.152005]  88021050 880210500100 8800d0d039b8 
a0319c16
[ 6480.152005] Call Trace:
[ 6480.152005]  [810ae967] do_raw_write_lock+0x47/0xa0
[ 6480.152005]  [8168e57e] _raw_write_lock+0x5e/0x80
[ 6480.152005]  [a0319c16] ? btrfs_tree_lock+0x116/0x270 [btrfs]
[ 6480.152005]  [a0319c16] btrfs_tree_lock+0x116/0x270 [btrfs]
[ 6480.152005]  [a02b2acb] btrfs_lock_root_node+0x3b/0x50 [btrfs]
[ 6480.152005]  [a02b81a6] btrfs_search_slot+0x916/0xa20 [btrfs]
[ 6480.152005]  [811a727f] ? create_object+0x23f/0x300
[ 6480.152005]  [a02b9958] btrfs_insert_empty_items+0x78/0xd0 [btrfs]
[ 6480.152005]  [a036041a] 
insert_normal_tree_ref.constprop.4+0xa2/0x19a [btrfs]
[ 6480.152005]  [a03605c3] test_no_shared_qgroup+0xb1/0x1ca [btrfs]
[ 6480.152005]  [8108cad6] ? local_clock+0x16/0x30
[ 6480.152005]  [a035ef8e] btrfs_test_qgroups+0x1ae/0x1d7 [btrfs]
[ 6480.152005]  [a03a69d2] ? 
ftrace_define_fields_btrfs_space_reservation+0xfd/0xfd [btrfs]
[ 6480.152005]  [a03a6a86] init_btrfs_fs+0xb4/0x153 [btrfs]
[ 6480.152005]  [81000352] do_one_initcall+0x102/0x150
[ 6480.152005]  [8103d223] ? set_memory_nx+0x43/0x50
[ 6480.152005]  [81682668] ? set_section_ro_nx+0x6d/0x74
[ 6480.152005]  [810d91cc] load_module+0x1cdc/0x2630
(...)

Therefore initialize the extent buffer as an empty leaf (level 0).

Issue easy to reproduce when btrfs is built as a module via:

$ for ((i = 1; i = 100; i++)); do rmmod btrfs; modprobe btrfs; done

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/tests/qgroup-tests.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index fa691b7..0e69c8e 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -410,6 +410,8 @@ int btrfs_test_qgroups(void)
 * *cough*backref walking code*cough*
 */
root-node = alloc_test_extent_buffer(root-fs_info, 4096, 4096);
+   btrfs_set_header_level(root-node, 0);
+   btrfs_set_header_nritems(root-node, 0);
if (!root-node) {
test_msg(Couldn't allocate dummy buffer\n);
ret = -ENOMEM;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More

[PATCH v2] Btrfs: fix qgroups sanity test crash or hang

2014-06-11 Thread Filipe David Borba Manana
Often when running the qgroups sanity test, a crash or a hang happened.
This is because the extent buffer the test uses for the root node doesn't
have an header level explicitly set, making it have a random level value.
This is a problem when it's not zero for the btrfs_search_slot() calls
the test ends up doing, resulting in crashes or hangs such as the following:

[ 6454.127192] Btrfs loaded, debug=on, assert=on, integrity-checker=on
(...)
[ 6454.127760] BTRFS: selftest: Running qgroup tests
[ 6454.127964] BTRFS: selftest: Running test_test_no_shared_qgroup
[ 6454.127966] BTRFS: selftest: Qgroup basic add
[ 6480.152005] BUG: soft lockup - CPU#0 stuck for 23s! [modprobe:5383]
[ 6480.152005] Modules linked in: btrfs(+) xor raid6_pq binfmt_misc nfsd 
auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc i2c_piix4 i2c_core 
pcspkr evbug psmouse serio_raw e1000 [last unloaded: btrfs]
[ 6480.152005] irq event stamp: 188448
[ 6480.152005] hardirqs last  enabled at (188447): [8168ef5c] 
restore_args+0x0/0x30
[ 6480.152005] hardirqs last disabled at (188448): [81698e6a] 
apic_timer_interrupt+0x6a/0x80
[ 6480.152005] softirqs last  enabled at (188446): [810516cf] 
__do_softirq+0x1cf/0x450
[ 6480.152005] softirqs last disabled at (188441): [81051c25] 
irq_exit+0xb5/0xc0
[ 6480.152005] CPU: 0 PID: 5383 Comm: modprobe Not tainted 
3.15.0-rc8-fdm-btrfs-next-33+ #4
[ 6480.152005] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 6480.152005] task: 8802146125a0 ti: 8800d0d0 task.ti: 
8800d0d0
[ 6480.152005] RIP: 0010:[81349a63]  [81349a63] 
__write_lock_failed+0x13/0x20
[ 6480.152005] RSP: 0018:8800d0d038e8  EFLAGS: 0287
[ 6480.152005] RAX:  RBX: 8168ef5c RCX: 05deb8525852
[ 6480.152005] RDX:  RSI: 1d45 RDI: 8802105000b8
[ 6480.152005] RBP: 8800d0d038e8 R08: fe12710f63db R09: a03196fb
[ 6480.152005] R10: 8802146125a0 R11: 880214612e28 R12: 8800d0d03858
[ 6480.152005] R13:  R14: 8800d0d0 R15: 8802146125a0
[ 6480.152005] FS:  7f14ff804700() GS:880215e0() 
knlGS:
[ 6480.152005] CS:  0010 DS:  ES:  CR0: 8005003b
[ 6480.152005] CR2: 7fff4df0dac8 CR3: d1796000 CR4: 06f0
[ 6480.152005] Stack:
[ 6480.152005]  8800d0d03908 810ae967 0001 
8802105000b8
[ 6480.152005]  8800d0d03938 8168e57e a0319c16 
0007
[ 6480.152005]  88021050 880210500100 8800d0d039b8 
a0319c16
[ 6480.152005] Call Trace:
[ 6480.152005]  [810ae967] do_raw_write_lock+0x47/0xa0
[ 6480.152005]  [8168e57e] _raw_write_lock+0x5e/0x80
[ 6480.152005]  [a0319c16] ? btrfs_tree_lock+0x116/0x270 [btrfs]
[ 6480.152005]  [a0319c16] btrfs_tree_lock+0x116/0x270 [btrfs]
[ 6480.152005]  [a02b2acb] btrfs_lock_root_node+0x3b/0x50 [btrfs]
[ 6480.152005]  [a02b81a6] btrfs_search_slot+0x916/0xa20 [btrfs]
[ 6480.152005]  [811a727f] ? create_object+0x23f/0x300
[ 6480.152005]  [a02b9958] btrfs_insert_empty_items+0x78/0xd0 [btrfs]
[ 6480.152005]  [a036041a] 
insert_normal_tree_ref.constprop.4+0xa2/0x19a [btrfs]
[ 6480.152005]  [a03605c3] test_no_shared_qgroup+0xb1/0x1ca [btrfs]
[ 6480.152005]  [8108cad6] ? local_clock+0x16/0x30
[ 6480.152005]  [a035ef8e] btrfs_test_qgroups+0x1ae/0x1d7 [btrfs]
[ 6480.152005]  [a03a69d2] ? 
ftrace_define_fields_btrfs_space_reservation+0xfd/0xfd [btrfs]
[ 6480.152005]  [a03a6a86] init_btrfs_fs+0xb4/0x153 [btrfs]
[ 6480.152005]  [81000352] do_one_initcall+0x102/0x150
[ 6480.152005]  [8103d223] ? set_memory_nx+0x43/0x50
[ 6480.152005]  [81682668] ? set_section_ro_nx+0x6d/0x74
[ 6480.152005]  [810d91cc] load_module+0x1cdc/0x2630
(...)

Therefore initialize the extent buffer as an empty leaf (level 0).

Issue easy to reproduce when btrfs is built as a module via:

$ for ((i = 1; i = 100; i++)); do rmmod btrfs; modprobe btrfs; done

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Fixed silly mistake. Set root-node's header level and nritems after
checking if root-node is not null.

 fs/btrfs/tests/qgroup-tests.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index fa691b7..ec3dcb2 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -415,6 +415,8 @@ int btrfs_test_qgroups(void)
ret = -ENOMEM;
goto out;
}
+   btrfs_set_header_level(root-node, 0);
+   btrfs_set_header_nritems(root-node, 0);
root-alloc_bytenr += 8192;
 
tmp_root = btrfs_alloc_dummy_root();
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord

[PATCH v6] xfstests: add test for btrfs cloning with file holes

2014-06-10 Thread Filipe David Borba Manana
Regression test for the btrfs ioctl clone operation when the source range
contains hole(s) and the FS has the NO_HOLES feature enabled (file holes
don't need file extent items in the btree to represent them).

This issue is fixed by the following linux kernel btrfs patch:

Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Increased test coverage by testing the cases where a hole overlaps
the start and end of the cloning range.

V3: Test the case where the cloning range includes an hole at the end
of the source file and might increase the size of the target file.

V4: Added test for the case where the clone range covers only a hole at
the beginning of the source file.
Made the test be skipped if the available version of mkfs.btrfs
doesn't support the no-holes feature. And when testing the case
where the no-holes feature isn't enabled, explicitly ask mkfs.btrfs
to disable no-holes (future versions of mkfs.btrfs might enable
this feature by default).

V5: Detect if kernel supports NO_HOLES feature too. Added some messages
(echoes) before each od call to make it easier to match output
with each specific test.

V6: Pass -s to xfs_io when creating the test files.

 common/rc   |  25 
 tests/btrfs/055 | 165 +
 tests/btrfs/055.out | 347 
 tests/btrfs/group   |   1 +
 4 files changed, 538 insertions(+)
 create mode 100755 tests/btrfs/055
 create mode 100644 tests/btrfs/055.out

diff --git a/common/rc b/common/rc
index f27ee53..e2136d0 100644
--- a/common/rc
+++ b/common/rc
@@ -2177,6 +2177,31 @@ _require_btrfs_send_stream_version()
fi
 }
 
+_require_btrfs_mkfs_feature()
+{
+   if [ -z $1 ]; then
+   echo Missing feature name argument for 
_require_btrfs_mkfs_feature
+   exit 1
+   fi
+   feat=$1
+   $MKFS_BTRFS_PROG -O list-all 21 | \
+   grep '^[ \t]*'$feat'\b'  /dev/null 21
+   [ $? -eq 0 ] || \
+   _notrun Feature $feat not supported in the available version 
of mkfs.btrfs
+}
+
+_require_btrfs_fs_feature()
+{
+   if [ -z $1 ]; then
+   echo Missing feature name argument for 
_require_btrfs_fs_feature
+   exit 1
+   fi
+   feat=$1
+   modprobe btrfs  /dev/null 21
+   [ -e /sys/fs/btrfs/features/$feat ] || \
+   _notrun Feature $feat not supported by the available btrfs 
version
+}
+
 init_rc()
 {
if [ $iam == new ]
diff --git a/tests/btrfs/055 b/tests/btrfs/055
new file mode 100755
index 000..10c6040
--- /dev/null
+++ b/tests/btrfs/055
@@ -0,0 +1,165 @@
+#! /bin/bash
+# FS QA Test No. btrfs/055
+#
+# Regression test for the btrfs ioctl clone operation when the source range
+# contains hole(s) and the FS has the NO_HOLES feature enabled (file holes
+# don't need file extent items in the btree to represent them).
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_require_btrfs_fs_feature no_holes
+_require_btrfs_mkfs_feature no-holes
+_need_to_be_root
+
+rm -f $seqres.full
+
+test_btrfs_clone_with_holes()
+{
+   _scratch_mkfs $1 /dev/null 21
+   _scratch_mount
+
+   # Create a file with 4 extents and 1 hole, all with a size of 8Kb each.
+   # The hole is in the range [16384, 24576[.
+   $XFS_IO_PROG -s -f -c pwrite -S 0x01 -b 8192 0 8192 \
+   -c pwrite -S 0x02 -b 8192 8192 8192 \
+   -c pwrite -S 0x04 -b 8192 24576 8192 \
+   -c pwrite

[PATCH v3] xfstests: add test for btrfs clone + fsync durability

2014-06-10 Thread Filipe David Borba Manana
Regression test for btrfs ioctl clone operation + fsync + log
recovery. The issue was that doing an fsync after cloning into
a file didn't gave any persistence guarantees as it should.
What happened was that the in memory metadata (extent maps)
weren't updated, which made the fsync code not able to detect
that file data has been changed and must be persisted to the
log.

This issue is fixed by the following linux kernel btrfs patch:

Btrfs: make fsync work after cloning into a file

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Test small files too, consisting of a single inline extent, as
it triggers different code paths.

V3: Pass -s to xfs_io when creating test files.

 tests/btrfs/056 | 145 
 tests/btrfs/056.out | 129 ++
 tests/btrfs/group   |   1 +
 3 files changed, 275 insertions(+)
 create mode 100755 tests/btrfs/056
 create mode 100644 tests/btrfs/056.out

diff --git a/tests/btrfs/056 b/tests/btrfs/056
new file mode 100755
index 000..9ecfeb8
--- /dev/null
+++ b/tests/btrfs/056
@@ -0,0 +1,145 @@
+#! /bin/bash
+# FS QA Test No. btrfs/056
+#
+# Regression test for btrfs ioctl clone operation + fsync + log recovery.
+# The issue was that doing an fsync after cloning into a file didn't gave any
+# persistence guarantees as it should. What happened was that the in memory
+# metadata (extent maps) weren't updated, which made the fsync code not able
+# to detect that file data has been changed.
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#Btrfs: make fsync work after cloning into a file
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+   _cleanup_flakey
+   rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/dmflakey
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_require_btrfs_fs_feature no_holes
+_require_btrfs_mkfs_feature no-holes
+_require_dm_flakey
+_need_to_be_root
+
+rm -f $seqres.full
+
+test_btrfs_clone_fsync_log_recover()
+{
+   _scratch_mkfs $1 /dev/null 21
+   _init_flakey
+   SAVE_MOUNT_OPTIONS=$MOUNT_OPTIONS
+   MOUNT_OPTIONS=$MOUNT_OPTIONS $2
+   _mount_flakey
+
+   # Create a file with 4 extents and 1 hole, all with a size of 8Kb each.
+   # The hole is in the range [16384, 24576[.
+   $XFS_IO_PROG -s -f -c pwrite -S 0x01 -b 8192 0 8192 \
+   -c pwrite -S 0x02 -b 8192 8192 8192 \
+   -c pwrite -S 0x04 -b 8192 24576 8192 \
+   -c pwrite -S 0x05 -b 8192 32768 8192 \
+   $SCRATCH_MNT/foo | _filter_xfs_io
+
+   # Clone destination file, 1 extent of 96kb.
+   $XFS_IO_PROG -f -c pwrite -S 0xff -b 98304 0 98304 -c fsync \
+   $SCRATCH_MNT/bar | _filter_xfs_io
+
+   # Clone second half of the 2nd extent, the 8kb hole, the 3rd extent
+   # and the first half of the 4th extent into file bar.
+   $CLONER_PROG -s 12288 -d 0 -l 24576 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
+   $XFS_IO_PROG -c fsync $SCRATCH_MNT/bar
+
+   # Test small files too consisting of 1 inline extent
+   $XFS_IO_PROG -f -c pwrite -S 0x00 -b 3500 0 3500 -c fsync \
+   $SCRATCH_MNT/foo2 | _filter_xfs_io
+
+   $XFS_IO_PROG -f -c pwrite -S 0xcc -b 1000 0 1000 -c fsync \
+   $SCRATCH_MNT/bar2 | _filter_xfs_io
+
+   # Clone the entire foo2 file into bar2, overwriting all data in bar2
+   # and increasing its size.
+   $CLONER_PROG -s 0 -d 0 -l 3500 $SCRATCH_MNT/foo2 $SCRATCH_MNT/bar2
+   $XFS_IO_PROG -c fsync $SCRATCH_MNT/bar2
+
+   _load_flakey_table $FLAKEY_DROP_WRITES
+   _unmount_flakey
+
+   # Verify that there are no consistency errors.
+   _check_scratch_fs $FLAKEY_DEV
+
+   _load_flakey_table $FLAKEY_ALLOW_WRITES
+   _mount_flakey

[PATCH] Btrfs: fix RCU correctness warning when running sanity tests

2014-06-10 Thread Filipe David Borba Manana
When CONFIG_PROVE_RCU=y and CONFIG_PROVE_RCU_REPEATEDLY=y, the
following was dumped in dmesg:

[ 3197.218064] ===
[ 3197.218064] [ INFO: suspicious RCU usage. ]
[ 3197.218066] 3.15.0-rc8-fdm-btrfs-next-33+ #4 Not tainted
[ 3197.218067] ---
[ 3197.218068] include/linux/radix-tree.h:196 suspicious 
rcu_dereference_check() usage!
[ 3197.218068]
[ 3197.218068] other info that might help us debug this:
[ 3197.218068]
[ 3197.218070]
[ 3197.218070] rcu_scheduler_active = 1, debug_locks = 1
[ 3197.218071] 1 lock held by modprobe/12024:
[ 3197.218072]  #0:  ((fs_info-buffer_lock)-rlock){+.+...}, at: 
[a025c5fa] btrfs_free_dummy_root+0x5a/0x1d0 [btrfs]
[ 3197.218093]
[ 3197.218093] stack backtrace:
[ 3197.218095] CPU: 3 PID: 12024 Comm: modprobe Not tainted 
3.15.0-rc8-fdm-btrfs-next-33+ #4
[ 3197.218096] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 3197.218097]  0001 8800af18fc18 81685c5a 
feb0
[ 3197.218099]  8800cf6ccb40 8800af18fc48 810a6316 
8801d955f640
[ 3197.218101]  8800d719e328 8800d719e370 8800d719c000 
8800af18fcb8
[ 3197.218102] Call Trace:
[ 3197.218105]  [81685c5a] dump_stack+0x4e/0x68
[ 3197.218108]  [810a6316] lockdep_rcu_suspicious+0xe6/0x130
[ 3197.218119]  [a025c728] btrfs_free_dummy_root+0x188/0x1d0 [btrfs]
[ 3197.218129]  [a025f56a] btrfs_test_qgroups+0xea/0x1bb [btrfs]
[ 3197.218137]  [a03a19d2] ? 
ftrace_define_fields_btrfs_space_reservation+0xfd/0xfd [btrfs]
[ 3197.218144]  [a03a19d2] ? 
ftrace_define_fields_btrfs_space_reservation+0xfd/0xfd [btrfs]
[ 3197.218151]  [a03a1ab7] init_btrfs_fs+0xe5/0x184 [btrfs]
[ 3197.218154]  [81000352] do_one_initcall+0x102/0x150
[ 3197.218157]  [8103d223] ? set_memory_nx+0x43/0x50
[ 3197.218160]  [81682668] ? set_section_ro_nx+0x6d/0x74
[ 3197.218162]  [810d91cc] load_module+0x1cdc/0x2630
[ 3197.218164]  [810d4e90] ? show_initstate+0x60/0x60
[ 3197.218166]  [810d9c9e] SyS_finit_module+0x8e/0x90
[ 3197.218168]  [81698212] system_call_fastpath+0x16/0x1b

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/tests/btrfs-tests.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index a5dcacb..bbbfec9 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -130,8 +130,8 @@ static void btrfs_free_dummy_fs_info(struct btrfs_fs_info 
*fs_info)
struct radix_tree_iter iter;
void **slot;
 
-   spin_lock(fs_info-buffer_lock);
 restart:
+   rcu_read_lock();
radix_tree_for_each_slot(slot, fs_info-buffer_radix, iter, 0) {
struct extent_buffer *eb;
 
@@ -144,11 +144,11 @@ restart:
goto restart;
continue;
}
-   spin_unlock(fs_info-buffer_lock);
+   rcu_read_unlock();
free_extent_buffer_stale(eb);
-   spin_lock(fs_info-buffer_lock);
+   goto restart;
}
-   spin_unlock(fs_info-buffer_lock);
+   rcu_read_unlock();
 
btrfs_free_qgroup_config(fs_info);
btrfs_free_fs_roots(fs_info);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: fix RCU correctness warning when running sanity tests

2014-06-10 Thread Filipe David Borba Manana
When CONFIG_PROVE_RCU=y and CONFIG_PROVE_RCU_REPEATEDLY=y, the
following was dumped in dmesg:

[ 3197.218064] ===
[ 3197.218064] [ INFO: suspicious RCU usage. ]
[ 3197.218066] 3.15.0-rc8-fdm-btrfs-next-33+ #4 Not tainted
[ 3197.218067] ---
[ 3197.218068] include/linux/radix-tree.h:196 suspicious 
rcu_dereference_check() usage!
[ 3197.218068]
[ 3197.218068] other info that might help us debug this:
[ 3197.218068]
[ 3197.218070]
[ 3197.218070] rcu_scheduler_active = 1, debug_locks = 1
[ 3197.218071] 1 lock held by modprobe/12024:
[ 3197.218072]  #0:  ((fs_info-buffer_lock)-rlock){+.+...}, at: 
[a025c5fa] btrfs_free_dummy_root+0x5a/0x1d0 [btrfs]
[ 3197.218093]
[ 3197.218093] stack backtrace:
[ 3197.218095] CPU: 3 PID: 12024 Comm: modprobe Not tainted 
3.15.0-rc8-fdm-btrfs-next-33+ #4
[ 3197.218096] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 3197.218097]  0001 8800af18fc18 81685c5a 
feb0
[ 3197.218099]  8800cf6ccb40 8800af18fc48 810a6316 
8801d955f640
[ 3197.218101]  8800d719e328 8800d719e370 8800d719c000 
8800af18fcb8
[ 3197.218102] Call Trace:
[ 3197.218105]  [81685c5a] dump_stack+0x4e/0x68
[ 3197.218108]  [810a6316] lockdep_rcu_suspicious+0xe6/0x130
[ 3197.218119]  [a025c728] btrfs_free_dummy_root+0x188/0x1d0 [btrfs]
[ 3197.218129]  [a025f56a] btrfs_test_qgroups+0xea/0x1bb [btrfs]
[ 3197.218137]  [a03a19d2] ? 
ftrace_define_fields_btrfs_space_reservation+0xfd/0xfd [btrfs]
[ 3197.218144]  [a03a19d2] ? 
ftrace_define_fields_btrfs_space_reservation+0xfd/0xfd [btrfs]
[ 3197.218151]  [a03a1ab7] init_btrfs_fs+0xe5/0x184 [btrfs]
[ 3197.218154]  [81000352] do_one_initcall+0x102/0x150
[ 3197.218157]  [8103d223] ? set_memory_nx+0x43/0x50
[ 3197.218160]  [81682668] ? set_section_ro_nx+0x6d/0x74
[ 3197.218162]  [810d91cc] load_module+0x1cdc/0x2630
[ 3197.218164]  [810d4e90] ? show_initstate+0x60/0x60
[ 3197.218166]  [810d9c9e] SyS_finit_module+0x8e/0x90
[ 3197.218168]  [81698212] system_call_fastpath+0x16/0x1b

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Added missing rcu read unlock if a retry is needed.

 fs/btrfs/tests/btrfs-tests.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index a5dcacb..bdb1f05 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -130,8 +130,8 @@ static void btrfs_free_dummy_fs_info(struct btrfs_fs_info 
*fs_info)
struct radix_tree_iter iter;
void **slot;
 
-   spin_lock(fs_info-buffer_lock);
 restart:
+   rcu_read_lock();
radix_tree_for_each_slot(slot, fs_info-buffer_radix, iter, 0) {
struct extent_buffer *eb;
 
@@ -140,15 +140,17 @@ restart:
continue;
/* Shouldn't happen but that kind of thinking creates CVE's */
if (radix_tree_exception(eb)) {
-   if (radix_tree_deref_retry(eb))
+   if (radix_tree_deref_retry(eb)) {
+   rcu_read_unlock();
goto restart;
+   }
continue;
}
-   spin_unlock(fs_info-buffer_lock);
+   rcu_read_unlock();
free_extent_buffer_stale(eb);
-   spin_lock(fs_info-buffer_lock);
+   goto restart;
}
-   spin_unlock(fs_info-buffer_lock);
+   rcu_read_unlock();
 
btrfs_free_qgroup_config(fs_info);
btrfs_free_fs_roots(fs_info);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: ensure btrfs_prev_leaf doesn't miss 1 item

2014-06-09 Thread Filipe David Borba Manana
We might have had an item with the previous key in the tree right
before we released our path. And after we released our path, that
item might have been pushed to the first slot (0) of the leaf we
were holding due to a tree balance. Alternatively, an item with the
previous key can exist as the only element of a leaf (big fat item).
Therefore account for these 2 cases, so that our callers (like
btrfs_previous_item) don't miss an existing item with a key matching
the previous key we computed above.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ctree.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d99d965..4eada52 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -5097,7 +5097,17 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct 
btrfs_path *path)
return ret;
btrfs_item_key(path-nodes[0], found_key, 0);
ret = comp_keys(found_key, key);
-   if (ret  0)
+   /*
+* We might have had an item with the previous key in the tree right
+* before we released our path. And after we released our path, that
+* item might have been pushed to the first slot (0) of the leaf we
+* were holding due to a tree balance. Alternatively, an item with the
+* previous key can exist as the only element of a leaf (big fat item).
+* Therefore account for these 2 cases, so that our callers (like
+* btrfs_previous_item) don't miss an existing item with a key matching
+* the previous key we computed above.
+*/
+   if (ret = 0)
return 0;
return 1;
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4] Btrfs: make fsync work after cloning into a file

2014-06-08 Thread Filipe David Borba Manana
When cloning into a file, we were correctly replacing the extent
items in the target range and removing the extent maps. However
we weren't replacing the extent maps with new ones that point to
the new extents - as a consequence, an incremental fsync (when the
inode doesn't have the full sync flag) was a NOOP, since it relies
on the existence of extent maps in the modified list of the inode's
extent map tree, which was empty. Therefore add new extent maps to
reflect the target clone range.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Factored out needed code from inode.c:btrfs_get_extent() into a
separate function so that it can be reused by the cloning code,
avoiding some duplicated and non-trivial logic when populating
an extent map from a file extent item.

V3: Removed unused function parameter, leftover from V1.

V4: Simplified some code in mapping from file extent item to extent map.

 fs/btrfs/ctree.h |  5 
 fs/btrfs/file-item.c | 66 
 fs/btrfs/inode.c | 41 +++-
 fs/btrfs/ioctl.c | 65 +++
 4 files changed, 139 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af523d6..a668fd9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3749,6 +3749,11 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct 
inode *inode,
   struct bio *bio, u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 struct list_head *list, int search_commit);
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+const struct btrfs_path *path,
+struct btrfs_file_extent_item *fi,
+struct extent_map *em);
+
 /* inode.c */
 struct btrfs_delalloc_work {
struct inode *inode;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 609d56b..9732b33 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -885,3 +885,69 @@ out:
 fail_unlock:
goto out;
 }
+
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+const struct btrfs_path *path,
+struct btrfs_file_extent_item *fi,
+struct extent_map *em)
+{
+   struct btrfs_root *root = BTRFS_I(inode)-root;
+   struct extent_buffer *leaf = path-nodes[0];
+   const int slot = path-slots[0];
+   struct btrfs_key key;
+   u64 extent_start, extent_end;
+   u64 bytenr;
+   u8 type = btrfs_file_extent_type(leaf, fi);
+
+   em-bdev = root-fs_info-fs_devices-latest_bdev;
+   btrfs_item_key_to_cpu(leaf, key, slot);
+   extent_start = key.offset;
+
+   if (type == BTRFS_FILE_EXTENT_REG ||
+   type == BTRFS_FILE_EXTENT_PREALLOC) {
+   extent_end = extent_start +
+   btrfs_file_extent_num_bytes(leaf, fi);
+   } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+   size_t size;
+   size = btrfs_file_extent_inline_len(leaf, slot, fi);
+   extent_end = ALIGN(extent_start + size, root-sectorsize);
+   }
+
+   em-ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+   em-start = extent_start;
+   em-orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+   em-compress_type = btrfs_file_extent_compression(leaf, fi);
+   if (em-compress_type != BTRFS_COMPRESS_NONE)
+   set_bit(EXTENT_FLAG_COMPRESSED, em-flags);
+
+   if (type == BTRFS_FILE_EXTENT_REG ||
+   type == BTRFS_FILE_EXTENT_PREALLOC) {
+   em-len = extent_end - extent_start;
+   em-orig_start = extent_start -
+   btrfs_file_extent_offset(leaf, fi);
+   bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+   if (bytenr == 0) {
+   em-block_start = EXTENT_MAP_HOLE;
+   return;
+   }
+   if (em-compress_type != BTRFS_COMPRESS_NONE) {
+   em-block_len = em-orig_block_len;
+   } else {
+   bytenr += btrfs_file_extent_offset(leaf, fi);
+   em-block_len = em-len;
+   if (type == BTRFS_FILE_EXTENT_PREALLOC)
+   set_bit(EXTENT_FLAG_PREALLOC, em-flags);
+   }
+   em-block_start = bytenr;
+   } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+   em-block_start = EXTENT_MAP_INLINE;
+   em-orig_start = em-start;
+   em-len = extent_end - extent_start;
+   em-block_len = em-orig_block_len;
+   } else {
+   btrfs_err(root-fs_info

[PATCH v5] Btrfs: make fsync work after cloning into a file

2014-06-08 Thread Filipe David Borba Manana
When cloning into a file, we were correctly replacing the extent
items in the target range and removing the extent maps. However
we weren't replacing the extent maps with new ones that point to
the new extents - as a consequence, an incremental fsync (when the
inode doesn't have the full sync flag) was a NOOP, since it relies
on the existence of extent maps in the modified list of the inode's
extent map tree, which was empty. Therefore add new extent maps to
reflect the target clone range.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Factored out needed code from inode.c:btrfs_get_extent() into a
separate function so that it can be reused by the cloning code,
avoiding some duplicated and non-trivial logic when populating
an extent map from a file extent item.

V3: Removed unused function parameter, leftover from V1.

V4: Simplified some code in mapping from file extent item to extent map.

V5: Corrected refactoring to have the same exact behaviour as before (in
btrfs_get_extent) for new inline extents. Fixed an issue introduced
in V4 that made xfstests/generic/269 trigger an error and the following
warning in dmesg:

[13229.752008] WARNING: CPU: 1 PID: 13326 at fs/btrfs/extent_io.c:5097 
map_private_extent_buffer+0xd4/0xe0 [btrfs]()
[13229.752383] btrfs bad mapping eb start 78897152 len 4096, wanted 4098 8

Updated the corresponding test case for xfstests to test for inline
file extents.


 fs/btrfs/ctree.h |  6 +
 fs/btrfs/file-item.c | 76 
 fs/btrfs/inode.c | 42 +++--
 fs/btrfs/ioctl.c | 69 +++
 4 files changed, 155 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af523d6..b7e2c1c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3749,6 +3749,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct 
inode *inode,
   struct bio *bio, u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 struct list_head *list, int search_commit);
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+const struct btrfs_path *path,
+struct btrfs_file_extent_item *fi,
+const bool new_inline,
+struct extent_map *em);
+
 /* inode.c */
 struct btrfs_delalloc_work {
struct inode *inode;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 609d56b..f46cfe4 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -885,3 +885,79 @@ out:
 fail_unlock:
goto out;
 }
+
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+const struct btrfs_path *path,
+struct btrfs_file_extent_item *fi,
+const bool new_inline,
+struct extent_map *em)
+{
+   struct btrfs_root *root = BTRFS_I(inode)-root;
+   struct extent_buffer *leaf = path-nodes[0];
+   const int slot = path-slots[0];
+   struct btrfs_key key;
+   u64 extent_start, extent_end;
+   u64 bytenr;
+   u8 type = btrfs_file_extent_type(leaf, fi);
+   int compress_type = btrfs_file_extent_compression(leaf, fi);
+
+   em-bdev = root-fs_info-fs_devices-latest_bdev;
+   btrfs_item_key_to_cpu(leaf, key, slot);
+   extent_start = key.offset;
+
+   if (type == BTRFS_FILE_EXTENT_REG ||
+   type == BTRFS_FILE_EXTENT_PREALLOC) {
+   extent_end = extent_start +
+   btrfs_file_extent_num_bytes(leaf, fi);
+   } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+   size_t size;
+   size = btrfs_file_extent_inline_len(leaf, slot, fi);
+   extent_end = ALIGN(extent_start + size, root-sectorsize);
+   }
+
+   em-ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+   if (type == BTRFS_FILE_EXTENT_REG ||
+   type == BTRFS_FILE_EXTENT_PREALLOC) {
+   em-start = extent_start;
+   em-len = extent_end - extent_start;
+   em-orig_start = extent_start -
+   btrfs_file_extent_offset(leaf, fi);
+   em-orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+   bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+   if (bytenr == 0) {
+   em-block_start = EXTENT_MAP_HOLE;
+   return;
+   }
+   if (compress_type != BTRFS_COMPRESS_NONE) {
+   set_bit(EXTENT_FLAG_COMPRESSED, em-flags);
+   em-compress_type = compress_type;
+   em-block_start = bytenr

[PATCH v2] xfstests: add test for btrfs clone + fsync durability

2014-06-08 Thread Filipe David Borba Manana
Regression test for btrfs ioctl clone operation + fsync + log
recovery. The issue was that doing an fsync after cloning into
a file didn't gave any persistence guarantees as it should.
What happened was that the in memory metadata (extent maps)
weren't updated, which made the fsync code not able to detect
that file data has been changed and must be persisted to the
log.

This issue is fixed by the following linux kernel btrfs patch:

Btrfs: make fsync work after cloning into a file

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Test small files too, consisting of a single inline extent, as
it triggers different code paths.

 tests/btrfs/056 | 150 
 tests/btrfs/056.out | 129 
 tests/btrfs/group   |   1 +
 3 files changed, 280 insertions(+)
 create mode 100755 tests/btrfs/056
 create mode 100644 tests/btrfs/056.out

diff --git a/tests/btrfs/056 b/tests/btrfs/056
new file mode 100755
index 000..e066442
--- /dev/null
+++ b/tests/btrfs/056
@@ -0,0 +1,150 @@
+#! /bin/bash
+# FS QA Test No. btrfs/056
+#
+# Regression test for btrfs ioctl clone operation + fsync + log recovery.
+# The issue was that doing an fsync after cloning into a file didn't gave any
+# persistence guarantees as it should. What happened was that the in memory
+# metadata (extent maps) weren't updated, which made the fsync code not able
+# to detect that file data has been changed.
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#Btrfs: make fsync work after cloning into a file
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+   _cleanup_flakey
+   rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/dmflakey
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_require_btrfs_fs_feature no_holes
+_require_btrfs_mkfs_feature no-holes
+_require_dm_flakey
+_need_to_be_root
+
+rm -f $seqres.full
+
+test_btrfs_clone_fsync_log_recover()
+{
+   _scratch_mkfs $1 /dev/null 21
+   _init_flakey
+   SAVE_MOUNT_OPTIONS=$MOUNT_OPTIONS
+   MOUNT_OPTIONS=$MOUNT_OPTIONS $2
+   _mount_flakey
+
+   # Create a file with 4 extents and 1 hole, all with a size of 8Kb each.
+   # The hole is in the range [16384, 24576[.
+   $XFS_IO_PROG -f -c pwrite -S 0x01 -b 8192 0 8192 \
+   -c fsync \
+   -c pwrite -S 0x02 -b 8192 8192 8192 \
+   -c fsync \
+   -c pwrite -S 0x04 -b 8192 24576 8192 \
+   -c fsync \
+   -c pwrite -S 0x05 -b 8192 32768 8192 \
+   -c fsync \
+   $SCRATCH_MNT/foo | _filter_xfs_io
+
+   # Clone destination file, 1 extent of 96kb.
+   $XFS_IO_PROG -f -c pwrite -S 0xff -b 98304 0 98304 -c fsync \
+   $SCRATCH_MNT/bar | _filter_xfs_io
+
+   # Clone second half of the 2nd extent, the 8kb hole, the 3rd extent
+   # and the first half of the 4th extent into file bar.
+   $CLONER_PROG -s 12288 -d 0 -l 24576 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
+
+   $XFS_IO_PROG -c fsync $SCRATCH_MNT/bar
+
+   # Test small files too consisting of 1 inline extent
+   $XFS_IO_PROG -f -c pwrite -S 0x00 -b 3500 0 3500 -c fsync \
+   $SCRATCH_MNT/foo2 | _filter_xfs_io
+
+   $XFS_IO_PROG -f -c pwrite -S 0xcc -b 1000 0 1000 -c fsync \
+   $SCRATCH_MNT/bar2 | _filter_xfs_io
+
+   # Clone the entire foo2 file into bar2, overwriting all data in bar2
+   # and increasing its size.
+   $CLONER_PROG -s 0 -d 0 -l 3500 $SCRATCH_MNT/foo2 $SCRATCH_MNT/bar2
+   $XFS_IO_PROG -c fsync $SCRATCH_MNT/bar2
+
+   _load_flakey_table $FLAKEY_DROP_WRITES
+   _unmount_flakey
+
+   # Verify that there are no consistency errors.
+   _check_scratch_fs

[PATCH v3] Btrfs: make fsync work after cloning into a file

2014-06-06 Thread Filipe David Borba Manana
When cloning into a file, we were correctly replacing the extent
items in the target range and removing the extent maps. However
we weren't replacing the extent maps with new ones that point to
the new extents - as a consequence, an incremental fsync (when the
inode doesn't have the full sync flag) was a NOOP, since it relies
on the existence of extent maps in the modified list of the inode's
extent map tree, which was empty. Therefore add new extent maps to
reflect the target clone range.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Factored out needed code from inode.c:btrfs_get_extent() into a
separate function so that it can be reused by the cloning code,
avoiding some duplicated and non-trivial logic when populating
an extent map from a file extent item.

V3: Removed unused function parameter, leftover from V1.

 fs/btrfs/ctree.h |  5 
 fs/btrfs/file-item.c | 68 
 fs/btrfs/inode.c | 41 +++
 fs/btrfs/ioctl.c | 65 +
 4 files changed, 141 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af523d6..a668fd9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3749,6 +3749,11 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct 
inode *inode,
   struct bio *bio, u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 struct list_head *list, int search_commit);
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+const struct btrfs_path *path,
+struct btrfs_file_extent_item *fi,
+struct extent_map *em);
+
 /* inode.c */
 struct btrfs_delalloc_work {
struct inode *inode;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 609d56b..f8f2436 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -885,3 +885,71 @@ out:
 fail_unlock:
goto out;
 }
+
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+const struct btrfs_path *path,
+struct btrfs_file_extent_item *fi,
+struct extent_map *em)
+{
+   struct btrfs_root *root = BTRFS_I(inode)-root;
+   struct extent_buffer *leaf = path-nodes[0];
+   const int slot = path-slots[0];
+   struct btrfs_key key;
+   u64 extent_start, extent_end;
+   u64 bytenr;
+   u8 type = btrfs_file_extent_type(leaf, fi);
+
+   em-bdev = root-fs_info-fs_devices-latest_bdev;
+   btrfs_item_key_to_cpu(leaf, key, slot);
+   extent_start = key.offset;
+
+   if (type == BTRFS_FILE_EXTENT_REG ||
+   type == BTRFS_FILE_EXTENT_PREALLOC) {
+   extent_end = extent_start +
+   btrfs_file_extent_num_bytes(leaf, fi);
+   } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+   size_t size;
+   size = btrfs_file_extent_inline_len(leaf, slot, fi);
+   extent_end = ALIGN(extent_start + size, root-sectorsize);
+   }
+
+   em-ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+   em-compress_type = btrfs_file_extent_compression(leaf, fi);
+   if (em-compress_type != BTRFS_COMPRESS_NONE)
+   set_bit(EXTENT_FLAG_COMPRESSED, em-flags);
+
+   if (type == BTRFS_FILE_EXTENT_REG ||
+   type == BTRFS_FILE_EXTENT_PREALLOC) {
+   em-start = extent_start;
+   em-len = extent_end - extent_start;
+   em-orig_start = extent_start -
+   btrfs_file_extent_offset(leaf, fi);
+   em-orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+   bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+   if (bytenr == 0) {
+   em-block_start = EXTENT_MAP_HOLE;
+   return;
+   }
+   if (em-compress_type != BTRFS_COMPRESS_NONE) {
+   em-block_start = bytenr;
+   em-block_len = em-orig_block_len;
+   } else {
+   bytenr += btrfs_file_extent_offset(leaf, fi);
+   em-block_start = bytenr;
+   em-block_len = em-len;
+   if (type == BTRFS_FILE_EXTENT_PREALLOC)
+   set_bit(EXTENT_FLAG_PREALLOC, em-flags);
+   }
+   } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+   em-block_start = EXTENT_MAP_INLINE;
+   em-start = extent_start;
+   em-orig_start = EXTENT_MAP_HOLE;
+   em-len = extent_end - extent_start;
+   em-block_len = (u64)-1;
+   } else {
+   btrfs_err(root-fs_info

[PATCH 3/3] Btrfs: don't release invalid page in btrfs_page_exists_in_range()

2014-06-05 Thread Filipe David Borba Manana
In inode.c:btrfs_page_exists_in_range(), if the page we got from
the radix tree is an exception entry, which can't be retried, we
exit the loop with a non-NULL page and then call page_cache_release
against it, which is not ok since it's not a valid page. This could
also make us return true when we shouldn't.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f265f41..477e64a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6776,6 +6776,7 @@ bool btrfs_page_exists_in_range(struct inode *inode, 
loff_t start, loff_t end)
 * here as an exceptional entry: so return it without
 * attempting to raise page count.
 */
+   page = NULL;
break; /* TODO: Is this relevant for this use case? */
}
 
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] Btrfs: make sure we retry if we couldn't get the page

2014-06-05 Thread Filipe David Borba Manana
In inode.c:btrfs_page_exists_in_range(), if we can't get the page
we need to retry. However we weren't retrying because we weren't
setting page to NULL, which makes the while loop exit immediately
and will make us call page_cache_release after exiting the loop
which is incorrect because our page get didn't succeed. This could
also make us return true when we shouldn't.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 38d1e7b..cdbd20e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6777,8 +6777,10 @@ bool btrfs_page_exists_in_range(struct inode *inode, 
loff_t start, loff_t end)
break; /* TODO: Is this relevant for this use case? */
}
 
-   if (!page_cache_get_speculative(page))
+   if (!page_cache_get_speculative(page)) {
+   page = NULL;
continue;
+   }
 
/*
 * Has the page moved?
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] Btrfs: make sure we retry if page is a retriable exception

2014-06-05 Thread Filipe David Borba Manana
In inode.c:btrfs_page_exists_in_range(), if the page we get from the
radix tree is an exception which should make us retry, set page to
NULL in order to really retry, because otherwise we don't get another
loop iteration executed (page != NULL makes the while loop exit).
This also was making us call page_cache_release after exiting the loop,
which isn't correct because page doesn't point to a valid page, and
possibly return true from the function when we shouldn't.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cdbd20e..f265f41 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6767,8 +6767,10 @@ bool btrfs_page_exists_in_range(struct inode *inode, 
loff_t start, loff_t end)
break;
 
if (radix_tree_exception(page)) {
-   if (radix_tree_deref_retry(page))
+   if (radix_tree_deref_retry(page)) {
+   page = NULL;
continue;
+   }
/*
 * Otherwise, shmem/tmpfs must be storing a swap entry
 * here as an exceptional entry: so return it without
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: make fsync work after cloning into a file

2014-06-05 Thread Filipe David Borba Manana
When cloning into a file, we were correctly replacing the extent
items in the target range and removing the extent maps. However
we weren't replacing the extent maps with new ones that point to
the new extents - as a consequence, an incremental fsync (when the
inode doesn't have the full sync flag) was a NOOP, since it relies
on the existence of extent maps in the modified list of the inode's
extent map tree, which was empty. Therefore add new extent maps to
reflect the target clone range.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ioctl.c | 114 +++
 1 file changed, 114 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 44dcfd0..1197478 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3043,6 +3043,104 @@ out:
return ret;
 }
 
+static void clone_update_extent_map(struct inode *inode,
+   const struct btrfs_trans_handle *trans,
+   const struct btrfs_path *path,
+   const struct btrfs_key *key,
+   struct btrfs_file_extent_item *fi,
+   const u64 hole_offset,
+   const u64 hole_len)
+{
+   struct extent_map_tree *em_tree = BTRFS_I(inode)-extent_tree;
+   struct btrfs_root *root = BTRFS_I(inode)-root;
+   struct extent_buffer *leaf = path-nodes[0];
+   const int slot = path-slots[0];
+   struct extent_map *em;
+   u64 extent_start, extent_end;
+   u64 bytenr;
+   u8 type;
+   int ret;
+
+   em = alloc_extent_map();
+   if (!em) {
+   set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+   BTRFS_I(inode)-runtime_flags);
+   return;
+   }
+
+   em-bdev = root-fs_info-fs_devices-latest_bdev;
+   if (!fi) {
+   em-start = hole_offset;
+   em-len = hole_len;
+   em-ram_bytes = em-len;
+   em-orig_start = hole_offset;
+   em-block_start = EXTENT_MAP_HOLE;
+   em-block_len = 0;
+   em-orig_block_len = 0;
+   em-compress_type = BTRFS_COMPRESS_NONE;
+   em-generation = trans-transid;
+   goto insert_em;
+   }
+
+   em-generation = -1;
+   extent_start = key-offset;
+   extent_end = extent_start + btrfs_file_extent_num_bytes(leaf, fi);
+   bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+   type = btrfs_file_extent_type(leaf, fi);
+
+   em-start = extent_start;
+   em-ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+   em-compress_type = btrfs_file_extent_compression(leaf, fi);
+
+   if (em-compress_type != BTRFS_COMPRESS_NONE)
+   set_bit(EXTENT_FLAG_COMPRESSED, em-flags);
+
+   if (type == BTRFS_FILE_EXTENT_INLINE) {
+   em-len = ALIGN(btrfs_file_extent_inline_len(leaf, slot, fi),
+   root-sectorsize);
+   em-orig_block_len = em-len;
+   em-orig_start = em-start;
+   em-block_start = EXTENT_MAP_INLINE;
+   em-block_len = (u64)-1;
+   goto insert_em;
+   }
+
+   em-len = extent_end - extent_start;
+   em-orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+   em-orig_start = extent_start - btrfs_file_extent_offset(leaf, fi);
+   if (bytenr == 0)
+   em-block_start = EXTENT_MAP_HOLE;
+   else
+   em-block_start = bytenr;
+
+   if (em-compress_type == BTRFS_COMPRESS_NONE) {
+   em-block_start += btrfs_file_extent_offset(leaf, fi);
+   em-block_len = em-len;
+   } else {
+   em-block_len = em-orig_block_len;
+   }
+
+   if (type == BTRFS_FILE_EXTENT_PREALLOC)
+   set_bit(EXTENT_FLAG_PREALLOC, em-flags);
+
+insert_em:
+   while (1) {
+   write_lock(em_tree-lock);
+   ret = add_extent_mapping(em_tree, em, 1);
+   write_unlock(em_tree-lock);
+   if (ret != -EEXIST) {
+   free_extent_map(em);
+   break;
+   }
+   btrfs_drop_extent_cache(inode, em-start,
+   em-start + em-len - 1, 0);
+   }
+
+   if (unlikely(ret))
+   set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+   BTRFS_I(inode)-runtime_flags);
+}
+
 /**
  * btrfs_clone() - clone a range from inode file to another
  *
@@ -3361,8 +3459,19 @@ process_slot:
btrfs_item_ptr_offset(leaf, slot),
size);
inode_add_bytes(inode, datal);
+   extent = btrfs_item_ptr(leaf, slot,
+   struct

[PATCH] xfstests: add test for btrfs clone + fsync durability

2014-06-05 Thread Filipe David Borba Manana
Regression test for btrfs ioctl clone operation + fsync + log
recovery. The issue was that doing an fsync after cloning into
a file didn't gave any persistence guarantees as it should.
What happened was that the in memory metadata (extent maps)
weren't updated, which made the fsync code not able to detect
that file data has been changed and must be persisted to the
log.

This issue is fixed by the following linux kernel btrfs patch:

Btrfs: make fsync work after cloning into a file

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/btrfs/056 | 134 
 tests/btrfs/056.out |  89 ++
 tests/btrfs/group   |   1 +
 3 files changed, 224 insertions(+)
 create mode 100755 tests/btrfs/056
 create mode 100644 tests/btrfs/056.out

diff --git a/tests/btrfs/056 b/tests/btrfs/056
new file mode 100755
index 000..cfe87cd
--- /dev/null
+++ b/tests/btrfs/056
@@ -0,0 +1,134 @@
+#! /bin/bash
+# FS QA Test No. btrfs/056
+#
+# Regression test for btrfs ioctl clone operation + fsync + log recovery.
+# The issue was that doing an fsync after cloning into a file didn't gave any
+# persistence guarantees as it should. What happened was that the in memory
+# metadata (extent maps) weren't updated, which made the fsync code not able
+# to detect that file data has been changed.
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#Btrfs: make fsync work after cloning into a file
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+   _cleanup_flakey
+   rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/dmflakey
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_require_btrfs_fs_feature no_holes
+_require_btrfs_mkfs_feature no-holes
+_require_dm_flakey
+_need_to_be_root
+
+rm -f $seqres.full
+
+test_btrfs_clone_fsync_log_recover()
+{
+   _scratch_mkfs $1 /dev/null 21
+   _init_flakey
+   SAVE_MOUNT_OPTIONS=$MOUNT_OPTIONS
+   MOUNT_OPTIONS=$MOUNT_OPTIONS $2
+   _mount_flakey
+
+   # Create a file with 4 extents and 1 hole, all with a size of 8Kb each.
+   # The hole is in the range [16384, 24576[.
+   $XFS_IO_PROG -f -c pwrite -S 0x01 -b 8192 0 8192 \
+   -c fsync \
+   -c pwrite -S 0x02 -b 8192 8192 8192 \
+   -c fsync \
+   -c pwrite -S 0x04 -b 8192 24576 8192 \
+   -c fsync \
+   -c pwrite -S 0x05 -b 8192 32768 8192 \
+   -c fsync \
+   $SCRATCH_MNT/foo | _filter_xfs_io
+
+   # Clone destination file, 1 extent of 96kb.
+   $XFS_IO_PROG -f -c pwrite -S 0xff -b 98304 0 98304 -c fsync \
+   $SCRATCH_MNT/bar | _filter_xfs_io
+
+   # Clone second half of the 2nd extent, the 8kb hole, the 3rd extent
+   # and the first half of the 4th extent into file bar.
+   $CLONER_PROG -s 12288 -d 0 -l 24576 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
+
+   $XFS_IO_PROG -c fsync $SCRATCH_MNT/bar
+
+   _load_flakey_table $FLAKEY_DROP_WRITES
+   _unmount_flakey
+
+   # Verify that there are no consistency errors.
+   _check_scratch_fs $FLAKEY_DEV
+
+   _load_flakey_table $FLAKEY_ALLOW_WRITES
+   _mount_flakey
+
+   # Verify the cloned range was persisted by fsync and the log recovery
+   # code did its work well.
+   od -t x1 $SCRATCH_MNT/bar
+
+   _unmount_flakey
+
+   # Verify that there are no consistency errors.
+   _check_scratch_fs $FLAKEY_DEV
+
+   _cleanup_flakey
+   MOUNT_OPTIONS=$SAVE_MOUNT_OPTIONS
+}
+
+# Regardless of the NO_HOLES feature being enabled or not, the test results
+# should be exactly the same for both cases.
+
+echo Testing without the NO_HOLES feature
+# As of btrfs-progs 3.14.x, the no-holes feature isn't enabled

[PATCH] Btrfs: update commit root on snapshot creation after orphan cleanup

2014-06-03 Thread Filipe David Borba Manana
On snapshot creation (either writable or read-only), we do orphan cleanup
against the root of the snapshot. If the cleanup did remove any orphans,
then the current root node will be different from the commit root node
until the next transaction commit happens.

A send operation always uses the commit root of a snapshot - this means
it will see the orphans if it starts computing the send stream before the
next transaction commit happens (triggered by a timer or sync() for .e.g),
which is when the commit root gets assigned a reference to current root,
where the orphans are not visible anymore. The consequence of send seeing
the orphans is explained below.

For example:

mkfs.btrfs -f /dev/sdd
mount -o commit=999 /dev/sdd /mnt

# open a file with O_TMPFILE and leave it open
# write some data to the file
btrfs subvolume snapshot -r /mnt /mnt/snap1

btrfs send /mnt/snap1 -f /tmp/send.data

The send operation will fail with the following error:

ERROR: send ioctl failed with -116: Stale file handle

What happens here is that our snapshot has an orphan inode still visible
through the commit root, that corresponds to the tmpfile. However send
will attempt to call inode.c:btrfs_iget(), with the goal of reading the
file's data, which will return -ESTALE because it will use the current
root (and not the commit root) of the snapshot.

Of course, there are other cases where we can get orphans, but this
example using a tmpfile makes it much easier to reproduce the issue.

Therefore on snapshot creation, after calling btrfs_orphan_cleanup, if
the commit root is different from the current root, just commit the
transaction associated with the snapshot's root (if it exists), so that
a send will not see any orphans that don't exist anymore. This also
guarantees a send will always see the same content regardless of whether
a transaction commit happened already before the send was requested and
after the orphan cleanup (meaning the commit root and current roots are
the same) or it hasn't happened yet (commit and current roots are
different).

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ioctl.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 95194a9..6680ad9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -712,6 +712,35 @@ static int create_snapshot(struct btrfs_root *root, struct 
inode *dir,
if (ret)
goto fail;
 
+   /*
+* If orphan cleanup did remove any orphans, it means the tree was
+* modified and therefore the commit root is not the same as the
+* current root anymore. This is a problem, because send uses the
+* commit root and therefore can see inode items that don't exist
+* in the current root anymore, and for example make calls to
+* btrfs_iget, which will do tree lookups based on the current root
+* and not on the commit root. Those lookups will fail, returning a
+* -ESTALE error, and making send fail with that error. So make sure
+* a send does not see any orphans we have just removed, and that it
+* will see the same inodes regardless of whether a transaction
+* commit happened before it started (meaning that the commit root
+* will be the same as the current root) or not.
+*/
+   if (readonly  pending_snapshot-snap-node !=
+   pending_snapshot-snap-commit_root) {
+   trans = btrfs_join_transaction(pending_snapshot-snap);
+   if (IS_ERR(trans)  PTR_ERR(trans) != -ENOENT) {
+   ret = PTR_ERR(trans);
+   goto fail;
+   }
+   if (!IS_ERR(trans)) {
+   ret = btrfs_commit_transaction(trans,
+  pending_snapshot-snap);
+   if (ret)
+   goto fail;
+   }
+   }
+
inode = btrfs_lookup_dentry(dentry-d_parent-d_inode, dentry);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5] xfstests: add test for btrfs cloning with file holes

2014-06-03 Thread Filipe David Borba Manana
Regression test for the btrfs ioctl clone operation when the source range
contains hole(s) and the FS has the NO_HOLES feature enabled (file holes
don't need file extent items in the btree to represent them).

This issue is fixed by the following linux kernel btrfs patch:

Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Increased test coverage by testing the cases where a hole overlaps
the start and end of the cloning range.

V3: Test the case where the cloning range includes an hole at the end
of the source file and might increase the size of the target file.

V4: Added test for the case where the clone range covers only a hole at
the beginning of the source file.
Made the test be skipped if the available version of mkfs.btrfs
doesn't support the no-holes feature. And when testing the case
where the no-holes feature isn't enabled, explicitly ask mkfs.btrfs
to disable no-holes (future versions of mkfs.btrfs might enable
this feature by default).

V5: Detect if kernel supports NO_HOLES feature too. Added some messages
(echoes) before each od call to make it easier to match output
with each specific test.

 common/rc   |  25 
 tests/btrfs/055 | 173 ++
 tests/btrfs/055.out | 347 
 tests/btrfs/group   |   1 +
 4 files changed, 546 insertions(+)
 create mode 100755 tests/btrfs/055
 create mode 100644 tests/btrfs/055.out

diff --git a/common/rc b/common/rc
index f27ee53..e2136d0 100644
--- a/common/rc
+++ b/common/rc
@@ -2177,6 +2177,31 @@ _require_btrfs_send_stream_version()
fi
 }
 
+_require_btrfs_mkfs_feature()
+{
+   if [ -z $1 ]; then
+   echo Missing feature name argument for 
_require_btrfs_mkfs_feature
+   exit 1
+   fi
+   feat=$1
+   $MKFS_BTRFS_PROG -O list-all 21 | \
+   grep '^[ \t]*'$feat'\b'  /dev/null 21
+   [ $? -eq 0 ] || \
+   _notrun Feature $feat not supported in the available version 
of mkfs.btrfs
+}
+
+_require_btrfs_fs_feature()
+{
+   if [ -z $1 ]; then
+   echo Missing feature name argument for 
_require_btrfs_fs_feature
+   exit 1
+   fi
+   feat=$1
+   modprobe btrfs  /dev/null 21
+   [ -e /sys/fs/btrfs/features/$feat ] || \
+   _notrun Feature $feat not supported by the available btrfs 
version
+}
+
 init_rc()
 {
if [ $iam == new ]
diff --git a/tests/btrfs/055 b/tests/btrfs/055
new file mode 100755
index 000..be38d09
--- /dev/null
+++ b/tests/btrfs/055
@@ -0,0 +1,173 @@
+#! /bin/bash
+# FS QA Test No. btrfs/055
+#
+# Regression test for the btrfs ioctl clone operation when the source range
+# contains hole(s) and the FS has the NO_HOLES feature enabled (file holes
+# don't need file extent items in the btree to represent them).
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_require_btrfs_fs_feature no_holes
+_require_btrfs_mkfs_feature no-holes
+_need_to_be_root
+
+rm -f $seqres.full
+
+test_btrfs_clone_with_holes()
+{
+   _scratch_mkfs $1 /dev/null 21
+   _scratch_mount
+
+   # Create a file with 4 extents and 1 hole, all with a size of 8Kb each.
+   $XFS_IO_PROG -f -c pwrite -S 0x01 -b 8192 0 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x02 -b 8192 8192 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   # After the following write we get an hole in the range [16384, 24576

[PATCH v2] xfstests: add test for btrfs cloning with file holes

2014-05-31 Thread Filipe David Borba Manana
Regression test for the btrfs ioctl clone operation when the source range
contains hole(s) and the FS has the NO_HOLES feature enabled (file holes
don't need file extent items in the btree to represent them).

This issue is fixed by the following linux kernel btrfs patch:

Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Increased test coverage by testing the cases where a hole overlaps
the start and end of the cloning range.

 tests/btrfs/055 | 112 +
 tests/btrfs/055.out | 117 
 tests/btrfs/group   |   1 +
 3 files changed, 230 insertions(+)
 create mode 100755 tests/btrfs/055
 create mode 100644 tests/btrfs/055.out

diff --git a/tests/btrfs/055 b/tests/btrfs/055
new file mode 100755
index 000..4a1614b
--- /dev/null
+++ b/tests/btrfs/055
@@ -0,0 +1,112 @@
+#! /bin/bash
+# FS QA Test No. btrfs/055
+#
+# Regression test for the btrfs ioctl clone operation when the source range
+# contains hole(s) and the FS has the NO_HOLES feature enabled (file holes
+# don't need file extent items in the btree to represent them).
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_need_to_be_root
+
+rm -f $seqres.full
+
+test_btrfs_clone_with_holes()
+{
+   _scratch_mkfs $1 /dev/null 21
+   _scratch_mount
+
+   # Create a file with 4 extents and 1 hole, all with a size of 8Kb each.
+   $XFS_IO_PROG -f -c pwrite -S 0x01 -b 8192 0 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x02 -b 8192 8192 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   # After the following write we get a hole in the range [16384, 24576[
+   $XFS_IO_PROG -c pwrite -S 0x04 -b 8192 24576 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x05 -b 8192 32768 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+
+   # Clone destination file, 1 extent of 96kb.
+   $XFS_IO_PROG -f -c pwrite -S 0xff -b 98304 0 98304 $SCRATCH_MNT/bar \
+   | _filter_xfs_io
+   sync
+
+   # Clone 2nd extent, 8Kb hole and 3rd extent of foo into bar.
+   $CLONER_PROG -s 8192 -d 0 -l 24576 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
+
+   # Verify both extents and the hole were cloned.
+   od -t x1 $SCRATCH_MNT/bar
+
+   # Cloning range starts at the middle of a hole.
+   $CLONER_PROG -s 20480 -d 32768 -l 12288 $SCRATCH_MNT/foo 
$SCRATCH_MNT/bar
+
+   # Verify that half of the hole and the following 8Kb extent were cloned.
+   od -t x1 $SCRATCH_MNT/bar
+
+   # Cloning range ends at the middle of a hole.
+   $CLONER_PROG -s 0 -d 65536 -l 20480 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
+
+   # Verify that 2 extents of 8kb and a 4kb hole were cloned.
+   od -t x1 $SCRATCH_MNT/bar
+
+   # Verify that there are no consistency errors.
+   _check_scratch_fs
+}
+
+echo Testing without the NO_HOLES feature
+test_btrfs_clone_with_holes
+
+_scratch_unmount
+
+echo Testing with the NO_HOLES feature enabled
+test_btrfs_clone_with_holes -O no-holes
+
+status=0
+exit
diff --git a/tests/btrfs/055.out b/tests/btrfs/055.out
new file mode 100644
index 000..cd627ce
--- /dev/null
+++ b/tests/btrfs/055.out
@@ -0,0 +1,117 @@
+QA output created by 055
+Testing without the NO_HOLES feature
+wrote 8192/8192 bytes at offset 0
+XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8192/8192 bytes at offset 8192
+XXX

[PATCH v2] Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

2014-05-31 Thread Filipe David Borba Manana
If the NO_HOLES feature is enabled holes don't have file extent items in
the btree that represent them anymore. This made the clone operation
ignore the gaps that exist between consecutive file extent items and
therefore not create the holes at the destination.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Deal with holes at the boundaries of the cloning range and that
either overlap the boundary completely or partially.
Test case for xfstests updated too to test these 2 cases.

 fs/btrfs/ioctl.c | 54 +-
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 04ece8f..4a7a311 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2995,7 +2995,8 @@ out:
  * @destoff: Offset within @inode to start clone
  */
 static int btrfs_clone(struct inode *src, struct inode *inode,
-  u64 off, u64 olen, u64 olen_aligned, u64 destoff)
+  const u64 off, const u64 olen, const u64 olen_aligned,
+  const u64 destoff)
 {
struct btrfs_root *root = BTRFS_I(inode)-root;
struct btrfs_path *path = NULL;
@@ -3007,8 +3008,10 @@ static int btrfs_clone(struct inode *src, struct inode 
*inode,
int slot;
int ret;
int no_quota;
-   u64 len = olen_aligned;
+   const u64 len = olen_aligned;
u64 last_disko = 0;
+   u64 last_dest_end = destoff;
+   bool add_trailing_hole = false;
 
ret = -ENOMEM;
buf = vmalloc(btrfs_level_size(root, 0));
@@ -3077,6 +3080,7 @@ process_slot:
u64 datao = 0, datal = 0;
u8 comp;
u64 endoff;
+   u64 drop_start;
 
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
@@ -3106,7 +3110,20 @@ process_slot:
path-slots[0]++;
goto process_slot;
} else if (key.offset = off + len) {
-   break;
+   if (last_dest_end  destoff + len) {
+   /*
+* We have an implicit hole (NO_HOLES
+* feature is enabled) that fully or
+* partially overlaps our cloning range
+* at its end.
+*/
+   btrfs_release_path(path);
+   path-leave_spinning = 0;
+   add_trailing_hole = true;
+   goto start_trans;
+   } else {
+   break;
+   }
}
 
size = btrfs_item_size_nr(leaf, slot);
@@ -3125,6 +3142,19 @@ process_slot:
new_key.offset = destoff;
 
/*
+* Deal with a hole that doesn't have an extent item
+* that represents it (NO_HOLES feature enabled).
+* This hole is either in the middle of the cloning
+* range or at the beginning (fully overlaps it or
+* partially overlaps it).
+*/
+   if (new_key.offset != last_dest_end)
+   drop_start = last_dest_end;
+   else
+   drop_start = new_key.offset;
+
+start_trans:
+   /*
 * 1 - adjusting old extent (we may have to split it)
 * 1 - add new extent
 * 1 - inode update
@@ -3135,6 +3165,19 @@ process_slot:
goto out;
}
 
+   if (add_trailing_hole) {
+   ret = btrfs_drop_extents(trans, root, inode,
+last_dest_end,
+destoff + len, 1);
+   if (ret  ret != -EOPNOTSUPP)
+   btrfs_abort_transaction(trans, root,
+   ret);
+   btrfs_end_transaction(trans, root);
+   if (ret)
+   goto out;
+   break;
+   }
+
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC

[PATCH v3] Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

2014-05-31 Thread Filipe David Borba Manana
If the NO_HOLES feature is enabled holes don't have file extent items in
the btree that represent them anymore. This made the clone operation
ignore the gaps that exist between consecutive file extent items and
therefore not create the holes at the destination.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Deal with holes at the boundaries of the cloning range and that
either overlap the boundary completely or partially.
Test case for xfstests updated too to test these 2 cases.

V3: Deal with the case where the cloning range overlaps (partially or
completely) a hole at the end of the source file, and might increase
the size of the target file.
Updated the test for xfstests to cover these cases too.

 fs/btrfs/ioctl.c | 63 
 1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 04ece8f..f508f5e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2995,7 +2995,8 @@ out:
  * @destoff: Offset within @inode to start clone
  */
 static int btrfs_clone(struct inode *src, struct inode *inode,
-  u64 off, u64 olen, u64 olen_aligned, u64 destoff)
+  const u64 off, const u64 olen, const u64 olen_aligned,
+  const u64 destoff)
 {
struct btrfs_root *root = BTRFS_I(inode)-root;
struct btrfs_path *path = NULL;
@@ -3007,8 +3008,9 @@ static int btrfs_clone(struct inode *src, struct inode 
*inode,
int slot;
int ret;
int no_quota;
-   u64 len = olen_aligned;
+   const u64 len = olen_aligned;
u64 last_disko = 0;
+   u64 last_dest_end = destoff;
 
ret = -ENOMEM;
buf = vmalloc(btrfs_level_size(root, 0));
@@ -3077,6 +3079,7 @@ process_slot:
u64 datao = 0, datal = 0;
u8 comp;
u64 endoff;
+   u64 drop_start;
 
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
@@ -3125,6 +3128,18 @@ process_slot:
new_key.offset = destoff;
 
/*
+* Deal with a hole that doesn't have an extent item
+* that represents it (NO_HOLES feature enabled).
+* This hole is either in the middle of the cloning
+* range or at the beginning (fully overlaps it or
+* partially overlaps it).
+*/
+   if (new_key.offset != last_dest_end)
+   drop_start = last_dest_end;
+   else
+   drop_start = new_key.offset;
+
+   /*
 * 1 - adjusting old extent (we may have to split it)
 * 1 - add new extent
 * 1 - inode update
@@ -3153,7 +3168,7 @@ process_slot:
}
 
ret = btrfs_drop_extents(trans, root, inode,
-new_key.offset,
+drop_start,
 new_key.offset + datal,
 1);
if (ret) {
@@ -3254,7 +3269,7 @@ process_slot:
aligned_end = ALIGN(new_key.offset + datal,
root-sectorsize);
ret = btrfs_drop_extents(trans, root, inode,
-new_key.offset,
+drop_start,
 aligned_end,
 1);
if (ret) {
@@ -3301,6 +3316,7 @@ process_slot:
 * but shouldn't round up the file size
 */
endoff = new_key.offset + datal;
+   last_dest_end = endoff;
if (endoff  destoff+olen)
endoff = destoff+olen;
if (endoff  inode-i_size)
@@ -3321,6 +3337,45 @@ process_slot:
}
ret = 0;
 
+   if (last_dest_end  destoff + len) {
+   /*
+* We have an implicit hole (NO_HOLES feature is enabled) that
+* fully or partially overlaps our cloning range at its end.
+*/
+   btrfs_release_path(path);
+   path-leave_spinning = 0;
+
+   /*
+* 1 - remove extent(s)
+* 1

[PATCH v3] xfstests: add test for btrfs cloning with file holes

2014-05-31 Thread Filipe David Borba Manana
Regression test for the btrfs ioctl clone operation when the source range
contains hole(s) and the FS has the NO_HOLES feature enabled (file holes
don't need file extent items in the btree to represent them).

This issue is fixed by the following linux kernel btrfs patch:

Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Increased test coverage by testing the cases where a hole overlaps
the start and end of the cloning range.

V3: Test the case where the cloning range includes an hole at the end
of the source file and might increase the size of the target file.

 tests/btrfs/055 | 141 
 tests/btrfs/055.out | 225 
 tests/btrfs/group   |   1 +
 3 files changed, 367 insertions(+)
 create mode 100755 tests/btrfs/055
 create mode 100644 tests/btrfs/055.out

diff --git a/tests/btrfs/055 b/tests/btrfs/055
new file mode 100755
index 000..fad4b1c
--- /dev/null
+++ b/tests/btrfs/055
@@ -0,0 +1,141 @@
+#! /bin/bash
+# FS QA Test No. btrfs/055
+#
+# Regression test for the btrfs ioctl clone operation when the source range
+# contains hole(s) and the FS has the NO_HOLES feature enabled (file holes
+# don't need file extent items in the btree to represent them).
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_need_to_be_root
+
+rm -f $seqres.full
+
+test_btrfs_clone_with_holes()
+{
+   _scratch_mkfs $1 /dev/null 21
+   _scratch_mount
+
+   # Create a file with 4 extents and 1 hole, all with a size of 8Kb each.
+   $XFS_IO_PROG -f -c pwrite -S 0x01 -b 8192 0 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x02 -b 8192 8192 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   # After the following write we get a hole in the range [16384, 24576[
+   $XFS_IO_PROG -c pwrite -S 0x04 -b 8192 24576 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x05 -b 8192 32768 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+
+   # Clone destination file, 1 extent of 96kb.
+   $XFS_IO_PROG -f -c pwrite -S 0xff -b 98304 0 98304 $SCRATCH_MNT/bar \
+   | _filter_xfs_io
+   sync
+
+   # Clone 2nd extent, 8Kb hole and 3rd extent of foo into bar.
+   $CLONER_PROG -s 8192 -d 0 -l 24576 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
+
+   # Verify both extents and the hole were cloned.
+   od -t x1 $SCRATCH_MNT/bar
+
+   # Cloning range starts at the middle of a hole.
+   $CLONER_PROG -s 20480 -d 32768 -l 12288 $SCRATCH_MNT/foo \
+   $SCRATCH_MNT/bar
+
+   # Verify that half of the hole and the following 8Kb extent were cloned.
+   od -t x1 $SCRATCH_MNT/bar
+
+   # Cloning range ends at the middle of a hole.
+   $CLONER_PROG -s 0 -d 65536 -l 20480 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
+
+   # Verify that 2 extents of 8kb and a 4kb hole were cloned.
+   od -t x1 $SCRATCH_MNT/bar
+
+   # Create a 24Kb hole at the end of the source file (foo).
+   $XFS_IO_PROG -c truncate 65536 $SCRATCH_MNT/foo
+   sync
+
+   # Now clone a range that overlaps that hole at the end of the foo file.
+   # It should clone the last 4Kb of the extent at offset 32768 and the
+   # first 8kb of the 24kb hole at the end of foo.
+   $CLONER_PROG -s 36864 -d 86016 -l 12288 $SCRATCH_MNT/foo \
+   $SCRATCH_MNT/bar
+
+   # Verify that the second half of the 8Kb extent

[PATCH v4] Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

2014-05-31 Thread Filipe David Borba Manana
If the NO_HOLES feature is enabled holes don't have file extent items in
the btree that represent them anymore. This made the clone operation
ignore the gaps that exist between consecutive file extent items and
therefore not create the holes at the destination. When not using the
NO_HOLES feature, the holes were created at the destination.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Deal with holes at the boundaries of the cloning range and that
either overlap the boundary completely or partially.
Test case for xfstests updated too to test these 2 cases.

V3: Deal with the case where the cloning range overlaps (partially or
completely) a hole at the end of the source file, and might increase
the size of the target file.
Updated the test for xfstests to cover these cases too.

V4: Moved some duplicated code into an helper function.

 fs/btrfs/ioctl.c | 108 ++-
 1 file changed, 83 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 04ece8f..95194a9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2983,6 +2983,37 @@ out:
return ret;
 }
 
+static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
+struct inode *inode,
+u64 endoff,
+const u64 destoff,
+const u64 olen)
+{
+   struct btrfs_root *root = BTRFS_I(inode)-root;
+   int ret;
+
+   inode_inc_iversion(inode);
+   inode-i_mtime = inode-i_ctime = CURRENT_TIME;
+   /*
+* We round up to the block size at eof when determining which
+* extents to clone above, but shouldn't round up the file size.
+*/
+   if (endoff  destoff + olen)
+   endoff = destoff + olen;
+   if (endoff  inode-i_size)
+   btrfs_i_size_write(inode, endoff);
+
+   ret = btrfs_update_inode(trans, root, inode);
+   if (ret) {
+   btrfs_abort_transaction(trans, root, ret);
+   btrfs_end_transaction(trans, root);
+   goto out;
+   }
+   ret = btrfs_end_transaction(trans, root);
+out:
+   return ret;
+}
+
 /**
  * btrfs_clone() - clone a range from inode file to another
  *
@@ -2995,7 +3026,8 @@ out:
  * @destoff: Offset within @inode to start clone
  */
 static int btrfs_clone(struct inode *src, struct inode *inode,
-  u64 off, u64 olen, u64 olen_aligned, u64 destoff)
+  const u64 off, const u64 olen, const u64 olen_aligned,
+  const u64 destoff)
 {
struct btrfs_root *root = BTRFS_I(inode)-root;
struct btrfs_path *path = NULL;
@@ -3007,8 +3039,9 @@ static int btrfs_clone(struct inode *src, struct inode 
*inode,
int slot;
int ret;
int no_quota;
-   u64 len = olen_aligned;
+   const u64 len = olen_aligned;
u64 last_disko = 0;
+   u64 last_dest_end = destoff;
 
ret = -ENOMEM;
buf = vmalloc(btrfs_level_size(root, 0));
@@ -3076,7 +3109,7 @@ process_slot:
u64 disko = 0, diskl = 0;
u64 datao = 0, datal = 0;
u8 comp;
-   u64 endoff;
+   u64 drop_start;
 
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
@@ -3125,6 +3158,18 @@ process_slot:
new_key.offset = destoff;
 
/*
+* Deal with a hole that doesn't have an extent item
+* that represents it (NO_HOLES feature enabled).
+* This hole is either in the middle of the cloning
+* range or at the beginning (fully overlaps it or
+* partially overlaps it).
+*/
+   if (new_key.offset != last_dest_end)
+   drop_start = last_dest_end;
+   else
+   drop_start = new_key.offset;
+
+   /*
 * 1 - adjusting old extent (we may have to split it)
 * 1 - add new extent
 * 1 - inode update
@@ -3153,7 +3198,7 @@ process_slot:
}
 
ret = btrfs_drop_extents(trans, root, inode,
-new_key.offset,
+drop_start,
 new_key.offset + datal,
 1);
if (ret) {
@@ -3254,7 +3299,7 @@ process_slot

[PATCH] Btrfs: avoid visiting all extent items when cloning a range

2014-05-30 Thread Filipe David Borba Manana
When cloning a range of a file, we were visiting all the extent items in
the btree that belong to our source inode. We don't need to visit those
extent items that don't overlap the range we are cloning, as doing so only
makes us waste time and do unnecessary btree navigations (btrfs_next_leaf)
for inodes that have a large number of file extent items in the btree.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ioctl.c | 23 ---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 603c036..f20d91d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3025,7 +3025,7 @@ static int btrfs_clone(struct inode *src, struct inode 
*inode,
/* clone data */
key.objectid = btrfs_ino(src);
key.type = BTRFS_EXTENT_DATA_KEY;
-   key.offset = 0;
+   key.offset = off;
 
while (1) {
/*
@@ -3037,6 +3037,17 @@ static int btrfs_clone(struct inode *src, struct inode 
*inode,
0, 0);
if (ret  0)
goto out;
+   /*
+* First search, if no extent item that starts at offset off was
+* found but the previous item is an extent item, it's possible
+* it might overlap our target range, therefore process it.
+*/
+   if (key.offset == off  ret  0  path-slots[0]  0) {
+   btrfs_item_key_to_cpu(path-nodes[0], key,
+ path-slots[0] - 1);
+   if (key.type == BTRFS_EXTENT_DATA_KEY)
+   path-slots[0]--;
+   }
 
nritems = btrfs_header_nritems(path-nodes[0]);
 process_slot:
@@ -3086,10 +3097,16 @@ process_slot:
extent);
}
 
-   if (key.offset + datal = off ||
-   key.offset = off + len - 1) {
+   /*
+* The first search might have left us at an extent
+* item that ends before our target range's start, can
+* happen if we have holes and NO_HOLES feature enabled.
+*/
+   if (key.offset + datal = off) {
path-slots[0]++;
goto process_slot;
+   } else if (key.offset = off + len) {
+   break;
}
 
size = btrfs_item_size_nr(leaf, slot);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: ioctl, don't re-lock extent range when not necessary

2014-05-30 Thread Filipe David Borba Manana
In ioctl.c:lock_extent_range(), after locking our target range, the
ordered extent that btrfs_lookup_first_ordered_extent() returns us
may not overlap our target range at all. In this case we would just
unlock our target range, wait for any new ordered extents that overlap
the range to complete, lock again the range and repeat all these steps
until we don't get any ordered extent and the delalloc flag isn't set
in the io tree for our target range.

Therefore just stop if we get an ordered extent that doesn't overlap
our target range and the dealalloc flag isn't set for the range in
the inode's io tree.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ioctl.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 38f2169..603c036 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2700,10 +2700,15 @@ static inline void lock_extent_range(struct inode 
*inode, u64 off, u64 len)
lock_extent(BTRFS_I(inode)-io_tree, off, off + len - 1);
ordered = btrfs_lookup_first_ordered_extent(inode,
off + len - 1);
-   if (!ordered 
+   if ((!ordered ||
+ordered-file_offset + ordered-len = off ||
+ordered-file_offset = off + len) 
!test_range_bit(BTRFS_I(inode)-io_tree, off,
-   off + len - 1, EXTENT_DELALLOC, 0, NULL))
+   off + len - 1, EXTENT_DELALLOC, 0, NULL)) {
+   if (ordered)
+   btrfs_put_ordered_extent(ordered);
break;
+   }
unlock_extent(BTRFS_I(inode)-io_tree, off, off + len - 1);
if (ordered)
btrfs_put_ordered_extent(ordered);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled

2014-05-30 Thread Filipe David Borba Manana
If the NO_HOLES feature is enabled holes don't have file extent items in
the btree that represent them anymore. This made the clone operation
ignore the gaps that exist between consecutive file extent items and
therefore not create the holes at the destination.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ioctl.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ecf56af..bf34b7a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3009,6 +3009,7 @@ static int btrfs_clone(struct inode *src, struct inode 
*inode,
int no_quota;
u64 len = olen_aligned;
u64 last_disko = 0;
+   u64 last_dest_end = (u64)-1;
 
ret = -ENOMEM;
buf = vmalloc(btrfs_level_size(root, 0));
@@ -3077,6 +3078,7 @@ process_slot:
u64 datao = 0, datal = 0;
u8 comp;
u64 endoff;
+   u64 drop_start;
 
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
@@ -3125,6 +3127,16 @@ process_slot:
new_key.offset = destoff;
 
/*
+* Deal with a hole that doesn't have an extent item
+* that represents it (NO_HOLES feature enabled).
+*/
+   if (last_dest_end != (u64)-1 
+   new_key.offset != last_dest_end)
+   drop_start = last_dest_end;
+   else
+   drop_start = new_key.offset;
+
+   /*
 * 1 - adjusting old extent (we may have to split it)
 * 1 - add new extent
 * 1 - inode update
@@ -3153,7 +3165,7 @@ process_slot:
}
 
ret = btrfs_drop_extents(trans, root, inode,
-new_key.offset,
+drop_start,
 new_key.offset + datal,
 1);
if (ret) {
@@ -3254,7 +3266,7 @@ process_slot:
aligned_end = ALIGN(new_key.offset + datal,
root-sectorsize);
ret = btrfs_drop_extents(trans, root, inode,
-new_key.offset,
+drop_start,
 aligned_end,
 1);
if (ret) {
@@ -3301,6 +3313,7 @@ process_slot:
 * but shouldn't round up the file size
 */
endoff = new_key.offset + datal;
+   last_dest_end = endoff;
if (endoff  destoff+olen)
endoff = destoff+olen;
if (endoff  inode-i_size)
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] Btrfs: avoid visiting all extent items when cloning a range

2014-05-30 Thread Filipe David Borba Manana
When cloning a range of a file, we were visiting all the extent items in
the btree that belong to our source inode. We don't need to visit those
extent items that don't overlap the range we are cloning, as doing so only
makes us waste time and do unnecessary btree navigations (btrfs_next_leaf)
for inodes that have a large number of file extent items in the btree.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Leave the loop as soon as we process the last extent, to avoid
one extra and unnecessary btree search.

V3: Removed unintentional and unrelated change from v2.

 fs/btrfs/ioctl.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 603c036..04ece8f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3025,7 +3025,7 @@ static int btrfs_clone(struct inode *src, struct inode 
*inode,
/* clone data */
key.objectid = btrfs_ino(src);
key.type = BTRFS_EXTENT_DATA_KEY;
-   key.offset = 0;
+   key.offset = off;
 
while (1) {
/*
@@ -3037,6 +3037,17 @@ static int btrfs_clone(struct inode *src, struct inode 
*inode,
0, 0);
if (ret  0)
goto out;
+   /*
+* First search, if no extent item that starts at offset off was
+* found but the previous item is an extent item, it's possible
+* it might overlap our target range, therefore process it.
+*/
+   if (key.offset == off  ret  0  path-slots[0]  0) {
+   btrfs_item_key_to_cpu(path-nodes[0], key,
+ path-slots[0] - 1);
+   if (key.type == BTRFS_EXTENT_DATA_KEY)
+   path-slots[0]--;
+   }
 
nritems = btrfs_header_nritems(path-nodes[0]);
 process_slot:
@@ -3086,10 +3097,16 @@ process_slot:
extent);
}
 
-   if (key.offset + datal = off ||
-   key.offset = off + len - 1) {
+   /*
+* The first search might have left us at an extent
+* item that ends before our target range's start, can
+* happen if we have holes and NO_HOLES feature enabled.
+*/
+   if (key.offset + datal = off) {
path-slots[0]++;
goto process_slot;
+   } else if (key.offset = off + len) {
+   break;
}
 
size = btrfs_item_size_nr(leaf, slot);
@@ -3296,6 +3313,8 @@ process_slot:
goto out;
}
ret = btrfs_end_transaction(trans, root);
+   if (new_key.offset + datal = destoff + len)
+   break;
}
btrfs_release_path(path);
key.offset++;
@@ -3303,7 +3322,6 @@ process_slot:
ret = 0;
 
 out:
-   btrfs_release_path(path);
btrfs_free_path(path);
vfree(buf);
return ret;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix transaction leak during fsync call

2014-05-29 Thread Filipe David Borba Manana
If btrfs_log_dentry_safe() returns an error, we set ret to 1 and
fall through with the goal of committing the transaction. However,
in the case where the inode doesn't need a full sync, we would call
btrfs_wait_ordered_range() against the target range for our inode,
and if it returned an error, we would return without commiting or
ending the transaction, leaving the transaction open forever.

Since when btrfs_wait_ordered_range() doesn't return an error we
commit the transaction, it doesn't make sense to make a call to
btrfs_wait_ordered_range() because committing the transaction will
wait for all ordered extents to complete anyway.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/file.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8accf94..e0be468 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2022,12 +2022,6 @@ int btrfs_sync_file(struct file *file, loff_t start, 
loff_t end, int datasync)
goto out;
}
}
-   if (!full_sync) {
-   ret = btrfs_wait_ordered_range(inode, start,
-  end - start + 1);
-   if (ret)
-   goto out;
-   }
ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_end_transaction(trans, root);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: fix transaction leak during fsync call

2014-05-29 Thread Filipe David Borba Manana
If btrfs_log_dentry_safe() returns an error, we set ret to 1 and
fall through with the goal of committing the transaction. However,
in the case where the inode doesn't need a full sync, we would call
btrfs_wait_ordered_range() against the target range for our inode,
and if it returned an error, we would return without commiting or
ending the transaction.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Leave the call to btrfs_wait_ordered_range(), it's needed if
the fs is mounted with -o no_flushoncommit.

 fs/btrfs/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e46bfaf..5a00597 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2025,8 +2025,10 @@ int btrfs_sync_file(struct file *file, loff_t start, 
loff_t end, int datasync)
if (!full_sync) {
ret = btrfs_wait_ordered_range(inode, start,
   end - start + 1);
-   if (ret)
+   if (ret) {
+   btrfs_end_transaction(trans, root);
goto out;
+   }
}
ret = btrfs_commit_transaction(trans, root);
} else {
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2 v3] xfstests: add test for btrfs ioctl clone operation

2014-05-24 Thread Filipe David Borba Manana
This is a test to verify that the btrfs ioctl clone operation is
able to clone extents of a file to different positions of the file,
that is, the source and target files are the same. Existing tests
only cover the case where the source and target files are different.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Made the test exercise a more complex code path in the btrfs ioctl clone
code. Now we have extents with different sizes and make the cloner process
partial extents and split existing extents with smaller ones.

V3: Add tests to verify that after writing to a cloned extent, the original
extent isn't modified, that defragmenting a file with cloned extents
doesn't change the file contents and that all the tests have the same
exact semantics (as observed by an application/user) regardless of the
following options (and any combination): cow/nodatacow/compression.

 tests/btrfs/052 | 171 ++
 tests/btrfs/052.out | 499 
 tests/btrfs/group   |   1 +
 3 files changed, 671 insertions(+)
 create mode 100755 tests/btrfs/052
 create mode 100644 tests/btrfs/052.out

diff --git a/tests/btrfs/052 b/tests/btrfs/052
new file mode 100755
index 000..671034e
--- /dev/null
+++ b/tests/btrfs/052
@@ -0,0 +1,171 @@
+#! /bin/bash
+# FS QA Test No. btrfs/052
+#
+# Verify that the btrfs ioctl clone operation can operate on the same
+# file as a source and target. That is, clone extents within the same
+# file.
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_need_to_be_root
+
+rm -f $seqres.full
+
+test_btrfs_clone_same_file()
+{
+   if [ -z $1 ]; then
+   MOUNT_OPTIONS=
+   else
+   MOUNT_OPTIONS=-O $1
+   fi
+   _scratch_mkfs /dev/null 21
+   _scratch_mount $MOUNT_OPTIONS
+
+   # Create a file with 5 extents, 4 of 8Kb each and 1 of 64Kb.
+   $XFS_IO_PROG -f -c pwrite -S 0x01 -b 8192 0 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x02 -b 8192 8192 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x03 -b 8192 16384 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x04 -b 8192 24576 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+   $XFS_IO_PROG -c pwrite -S 0x05 -b 65536 32768 65536 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+   sync
+
+   # Digest of initial content.
+   md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+   # Same source and target ranges - must fail.
+   $CLONER_PROG -s 8192 -d 8192 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+   # Check file content didn't change.
+   md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+   # Intersection between source and target ranges - must fail too.
+   $CLONER_PROG -s 4096 -d 8192 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+   # Check file content didn't change.
+   md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+   # Clone an entire extent from a higher range to a lower range.
+   $CLONER_PROG -s 24576 -d 0 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+
+   # Check entire file, the 8Kb block at offset 0 now has the same content
+   # as the 8Kb block at offset 24576.
+   od -t x1 $SCRATCH_MNT/foo
+
+   # Clone an entire extent from a lower range to a higher range.
+   $CLONER_PROG -s 8192 -d 16384 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+
+   # Check entire file, the 8Kb block at offset 0 now has the same content
+   # as the 8Kb block at offset 24576, and the 8Kb block at offset 16384
+   # now has the same content as the 8Kb block

[PATCH v2] xfstests: add test for btrfs send with large xattrs

2014-05-24 Thread Filipe David Borba Manana
Verify that btrfs send is able to replicate xattrs larger than
PATH_MAX. This is possible if the b+tree leaf size is larger
than 4Kb (mkfs.btrfs's default is max(16Kb, PAGE_SIZE) as of
btrfs-progs v3.12, and max(4Kb, PAGE_SIZE in older versions).

This issue is fixed by the following linux kernel btrfs patch:

   Btrfs: send, use the right limits for xattr names and values

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Updated second invocation of btrfs send to be incremental.
This way we test both a full send (snapshot 1) and an incremental
send (differences between snapshot 2 and snapshot 1).

 tests/btrfs/053 | 109 
 tests/btrfs/053.out |   1 +
 tests/btrfs/group   |   1 +
 3 files changed, 111 insertions(+)
 create mode 100755 tests/btrfs/053
 create mode 100644 tests/btrfs/053.out

diff --git a/tests/btrfs/053 b/tests/btrfs/053
new file mode 100755
index 000..3994110
--- /dev/null
+++ b/tests/btrfs/053
@@ -0,0 +1,109 @@
+#! /bin/bash
+# FS QA Test No. btrfs/053
+#
+# Verify that btrfs send is able to replicate xattrs larger than PATH_MAX.
+# This is possible if the b+tree leaf size is larger than 4Kb (mkfs.btrfs's
+# default is max(16Kb, PAGE_SIZE) as of btrfs-progs v3.12, and max(4Kb,
+# PAGE_SIZE in older versions).
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#   Btrfs: send, use the right limits for xattr names and values
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $send_files_dir
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/attr
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_fssum
+_require_attrs
+_need_to_be_root
+
+# max(16384, PAGE_SIZE) is the default leaf/node size on btrfs-progs v3.12+.
+# Older versions just use max(4096, PAGE_SIZE).
+# mkfs.btrfs can't create an fs with a leaf/node size smaller than PAGE_SIZE.
+leaf_size=$(echo -e 16384\n`getconf PAGE_SIZE` | sort -nr | head -1)
+
+send_files_dir=$TEST_DIR/btrfs-test-$seq
+
+rm -f $seqres.full
+rm -fr $send_files_dir
+mkdir $send_files_dir
+
+_scratch_mkfs -l $leaf_size /dev/null 21
+_scratch_mount
+
+echo hello world  $SCRATCH_MNT/foobar
+
+$SETFATTR_PROG -n user.xattr_name_1 -v `$PERL_PROG -e 'print A x 6000;'` \
+   $SCRATCH_MNT/foobar
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap1
+run_check $FSSUM_PROG -A -f -w $send_files_dir/1.fssum $SCRATCH_MNT/mysnap1
+
+# Update existing xattr value and add a new xattr too.
+$SETFATTR_PROG -n user.xattr_name_1 -v `$PERL_PROG -e 'print Z x ;'` \
+   $SCRATCH_MNT/foobar
+$SETFATTR_PROG -n user.xattr_name_2 -v `$PERL_PROG -e 'print U x ;'` \
+   $SCRATCH_MNT/foobar
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap2
+run_check $FSSUM_PROG -A -f -w $send_files_dir/2.fssum \
+   -x $SCRATCH_MNT/mysnap2/mysnap1 $SCRATCH_MNT/mysnap2
+
+_run_btrfs_util_prog send $SCRATCH_MNT/mysnap1 -f $send_files_dir/1.snap
+_run_btrfs_util_prog send -p $SCRATCH_MNT/mysnap1 $SCRATCH_MNT/mysnap2 \
+   -f $send_files_dir/2.snap
+
+_scratch_unmount
+_check_scratch_fs
+
+_scratch_mkfs -l $leaf_size /dev/null 21
+_scratch_mount
+
+_run_btrfs_util_prog receive $SCRATCH_MNT -f $send_files_dir/1.snap
+run_check $FSSUM_PROG -r $send_files_dir/1.fssum $SCRATCH_MNT/mysnap1
+
+_run_btrfs_util_prog receive $SCRATCH_MNT -f $send_files_dir/2.snap
+run_check $FSSUM_PROG -r $send_files_dir/2.fssum $SCRATCH_MNT/mysnap2
+
+_check_scratch_fs
+
+status=0
+exit
diff --git a/tests/btrfs/053.out b/tests/btrfs/053.out
new file mode 100644
index 000..4c61638
--- /dev/null
+++ b/tests/btrfs/053.out
@@ -0,0 +1 @@
+QA output created by 053
diff --git a/tests/btrfs/group b/tests/btrfs/group
index 5ff9b8e..ea49c5c 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -55,3 +55,4 @@
 050 auto
 051 auto quick
 052 auto

[PATCH] Btrfs: send, don't error in the presence of subvols/snapshots

2014-05-24 Thread Filipe David Borba Manana
If we are doing an incremental send and the base snapshot has a
directory with name X that doesn't exist anymore in the second
snapshot and a new subvolume/snapshot exists in the second snapshot
that has the same name as the directory (name X), the incremental
send would fail with -ENOENT error. This is because it attempts
to lookup for an inode with a number matching the objectid of a
root, which doesn't exist.

Steps to reproduce:

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt

mkdir /mnt/testdir
btrfs subvolume snapshot -r /mnt /mnt/mysnap1

rmdir /mnt/testdir
btrfs subvolume create /mnt/testdir
btrfs subvolume snapshot -r /mnt /mnt/mysnap2

btrfs send -p /mnt/mysnap1 /mnt/mysnap2 -f /tmp/send.data

A test case for xfstests follows.

Reported-by: Robert White rwh...@pobox.com
Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/send.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1a65a40..f51525e 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1642,7 +1642,8 @@ out:
 static int lookup_dir_item_inode(struct btrfs_root *root,
 u64 dir, const char *name, int name_len,
 u64 *found_inode,
-u8 *found_type)
+u8 *found_type,
+int *found_is_root)
 {
int ret = 0;
struct btrfs_dir_item *di;
@@ -1666,6 +1667,8 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
btrfs_dir_item_key_to_cpu(path-nodes[0], di, key);
*found_inode = key.objectid;
*found_type = btrfs_dir_type(path-nodes[0], di);
+   if (found_is_root)
+   *found_is_root = (key.type == BTRFS_ROOT_ITEM_KEY);
 
 out:
btrfs_free_path(path);
@@ -1816,7 +1819,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 
dir, u64 dir_gen,
}
 
ret = lookup_dir_item_inode(sctx-parent_root, dir, name, name_len,
-   other_inode, other_type);
+   other_inode, other_type, NULL);
if (ret  0  ret != -ENOENT)
goto out;
if (ret) {
@@ -1861,6 +1864,7 @@ static int did_overwrite_ref(struct send_ctx *sctx,
u64 gen;
u64 ow_inode;
u8 other_type;
+   int other_is_root = 0;
 
if (!sctx-parent_root)
goto out;
@@ -1871,10 +1875,10 @@ static int did_overwrite_ref(struct send_ctx *sctx,
 
/* check if the ref was overwritten by another ref */
ret = lookup_dir_item_inode(sctx-send_root, dir, name, name_len,
-   ow_inode, other_type);
+   ow_inode, other_type, other_is_root);
if (ret  0  ret != -ENOENT)
goto out;
-   if (ret) {
+   if (ret || other_is_root) {
/* was never and will never be overwritten */
ret = 0;
goto out;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: set dead flag on the right root when destroying snapshot

2014-05-24 Thread Filipe David Borba Manana
We were setting the BTRFS_ROOT_SUBVOL_DEAD flag on the root of the
parent of our target snapshot, instead of setting it in the target
snapshot's root.

This is easy to observe by running the following scenario:

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt

btrfs subvolume create /mnt/first_subvol
btrfs subvolume snapshot -r /mnt /mnt/mysnap1

btrfs subvolume delete /mnt/first_subvol
btrfs subvolume snapshot -r /mnt /mnt/mysnap2

btrfs send -p /mnt/mysnap1 /mnt/mysnap2 -f /tmp/send.data

The send command failed because the send ioctl returned -EPERM.
A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 362720a..482cad5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2314,7 +2314,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file 
*file,
spin_lock(dest-root_item_lock);
root_flags = btrfs_root_flags(root-root_item);
if (root-send_in_progress == 0) {
-   btrfs_set_root_flags(root-root_item,
+   btrfs_set_root_flags(dest-root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(dest-root_item_lock);
} else {
@@ -2417,7 +2417,7 @@ out_unlock:
if (err) {
spin_lock(dest-root_item_lock);
root_flags = btrfs_root_flags(root-root_item);
-   btrfs_set_root_flags(root-root_item,
+   btrfs_set_root_flags(dest-root_item,
root_flags  ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(dest-root_item_lock);
}
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: set dead flag on the right root when destroying snapshot

2014-05-24 Thread Filipe David Borba Manana
We were setting the BTRFS_ROOT_SUBVOL_DEAD flag on the root of the
parent of our target snapshot, instead of setting it in the target
snapshot's root.

This is easy to observe by running the following scenario:

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt

btrfs subvolume create /mnt/first_subvol
btrfs subvolume snapshot -r /mnt /mnt/mysnap1

btrfs subvolume delete /mnt/first_subvol
btrfs subvolume snapshot -r /mnt /mnt/mysnap2

btrfs send -p /mnt/mysnap1 /mnt/mysnap2 -f /tmp/send.data

The send command failed because the send ioctl returned -EPERM.
A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Add missing replacements of 'root' with 'dest'.

 fs/btrfs/ioctl.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 362720a..38f2169 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2312,16 +2312,16 @@ static noinline int btrfs_ioctl_snap_destroy(struct 
file *file,
 * again is not run concurrently.
 */
spin_lock(dest-root_item_lock);
-   root_flags = btrfs_root_flags(root-root_item);
-   if (root-send_in_progress == 0) {
-   btrfs_set_root_flags(root-root_item,
+   root_flags = btrfs_root_flags(dest-root_item);
+   if (dest-send_in_progress == 0) {
+   btrfs_set_root_flags(dest-root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(dest-root_item_lock);
} else {
spin_unlock(dest-root_item_lock);
btrfs_warn(root-fs_info,
Attempt to delete subvolume %llu during send,
-   root-root_key.objectid);
+   dest-root_key.objectid);
err = -EPERM;
goto out_dput;
}
@@ -2416,8 +2416,8 @@ out_up_write:
 out_unlock:
if (err) {
spin_lock(dest-root_item_lock);
-   root_flags = btrfs_root_flags(root-root_item);
-   btrfs_set_root_flags(root-root_item,
+   root_flags = btrfs_root_flags(dest-root_item);
+   btrfs_set_root_flags(dest-root_item,
root_flags  ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(dest-root_item_lock);
}
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] xfstests: test for btrfs send when nested subvols/snapshots exist

2014-05-24 Thread Filipe David Borba Manana
Regression test for a btrfs incremental send issue where the difference
between the snapshots used by the incremental send consists of one of
these cases:

1) First snapshot has a directory with name X and in the second snapshot
   that directory doesn't exist anymore but a subvolume/snapshot with
   the same name (X) exists;

2) First snapshot has a subvolume/snapshot with name X and in the second
   snapshot that subvolume/snapshot doesn't exist anymore (might have been
   replaced by a directory with the same name or not).

This issue is fixed by the following linux kernel btrfs patches:

Btrfs: send, don't error in the presence of subvols/snapshots
Btrfs: set dead flag on the right root when destroying snapshot

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/btrfs/054 | 109 
 tests/btrfs/054.out |   1 +
 tests/btrfs/group   |   1 +
 3 files changed, 111 insertions(+)
 create mode 100755 tests/btrfs/054
 create mode 100644 tests/btrfs/054.out

diff --git a/tests/btrfs/054 b/tests/btrfs/054
new file mode 100755
index 000..215861c
--- /dev/null
+++ b/tests/btrfs/054
@@ -0,0 +1,109 @@
+#! /bin/bash
+# FS QA Test No. btrfs/054
+#
+# Regression test for a btrfs incremental send issue where the difference
+# between the snapshots used by the incremental send consists of one of
+# these cases:
+#
+# 1) First snapshot has a directory with name X and in the second snapshot
+#that directory doesn't exist anymore but a subvolume/snapshot with
+#the same name (X) exists;
+#
+# 2) First snapshot has a subvolume/snapshot with name X and in the second
+#snapshot that subvolume/snapshot doesn't exist anymore (might have been
+#replaced by a directory with the same name or not).
+#
+# This issue is fixed by the following linux kernel btrfs patches:
+#
+#Btrfs: send, don't error in the presence of subvols/snapshots
+#Btrfs: set dead flag on the right root when destroying snapshot
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $send_files_dir
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/attr
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_need_to_be_root
+
+send_files_dir=$TEST_DIR/btrfs-test-$seq
+
+rm -f $seqres.full
+rm -fr $send_files_dir
+mkdir $send_files_dir
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+mkdir $SCRATCH_MNT/testdir
+_run_btrfs_util_prog subvolume create $SCRATCH_MNT/first_subvol
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap1
+
+# Replace the directory testdir with a subvolume that has the same name.
+rmdir $SCRATCH_MNT/testdir
+_run_btrfs_util_prog subvolume create $SCRATCH_MNT/testdir
+
+# Delete the subvolume first_subvol and create a directory with the same name.
+_run_btrfs_util_prog subvolume delete $SCRATCH_MNT/first_subvol
+mkdir $SCRATCH_MNT/first_subvol
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap2
+
+_run_btrfs_util_prog send $SCRATCH_MNT/mysnap1 -f $send_files_dir/1.snap
+_run_btrfs_util_prog send $SCRATCH_MNT/mysnap2 -p $SCRATCH_MNT/mysnap1 \
+   -f $send_files_dir/2.snap
+
+_scratch_unmount
+_check_scratch_fs
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+_run_btrfs_util_prog receive $SCRATCH_MNT -f $send_files_dir/1.snap
+[ -e $SCRATCH_MNT/first_subvol ]  \
+   echo Subvolume first_subvol was not supposed to be replicated by full 
send!
+
+_run_btrfs_util_prog receive $SCRATCH_MNT -f $send_files_dir/2.snap
+[ -e $SCRATCH_MNT/testdir ]  \
+   echo Directory testdir was supposed to be deleted after incremental 
send!
+
+_check_scratch_fs
+
+status=0
+exit
diff --git a/tests/btrfs/054.out b/tests/btrfs/054.out
new file mode 100644
index 000..03e258b
--- /dev/null
+++ b/tests/btrfs/054.out
@@ -0,0 +1 @@
+QA output created by 054
diff --git a/tests/btrfs/group b/tests/btrfs/group
index

[PATCH v2] Btrfs: send, don't error in the presence of subvols/snapshots

2014-05-24 Thread Filipe David Borba Manana
If we are doing an incremental send and the base snapshot has a
directory with name X that doesn't exist anymore in the second
snapshot and a new subvolume/snapshot exists in the second snapshot
that has the same name as the directory (name X), the incremental
send would fail with -ENOENT error. This is because it attempts
to lookup for an inode with a number matching the objectid of a
root, which doesn't exist.

Steps to reproduce:

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt

mkdir /mnt/testdir
btrfs subvolume snapshot -r /mnt /mnt/mysnap1

rmdir /mnt/testdir
btrfs subvolume create /mnt/testdir
btrfs subvolume snapshot -r /mnt /mnt/mysnap2

btrfs send -p /mnt/mysnap1 /mnt/mysnap2 -f /tmp/send.data

A test case for xfstests follows.

Reported-by: Robert White rwh...@pobox.com
Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Simpler version.

 fs/btrfs/send.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1a65a40..2722b26 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1664,6 +1664,10 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
goto out;
}
btrfs_dir_item_key_to_cpu(path-nodes[0], di, key);
+   if (key.type == BTRFS_ROOT_ITEM_KEY) {
+   ret = -ENOENT;
+   goto out;
+   }
*found_inode = key.objectid;
*found_type = btrfs_dir_type(path-nodes[0], di);
 
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2 v2] xfstests: add test for btrfs ioctl clone operation

2014-05-23 Thread Filipe David Borba Manana
This is a test to verify that the btrfs ioctl clone operation is
able to clone extents of a file to different positions of the file,
that is, the source and target files are the same. Existing tests
only cover the case where the source and target files are different.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Made the test exercise a more complex code path in the btrfs ioctl clone
code. Now we have extents with different sizes and make the cloner process
partial extents and split existing extents with smaller ones.

 tests/btrfs/052 | 116 
 tests/btrfs/052.out |  51 +++
 tests/btrfs/group   |   1 +
 3 files changed, 168 insertions(+)
 create mode 100755 tests/btrfs/052
 create mode 100644 tests/btrfs/052.out

diff --git a/tests/btrfs/052 b/tests/btrfs/052
new file mode 100755
index 000..9b98521
--- /dev/null
+++ b/tests/btrfs/052
@@ -0,0 +1,116 @@
+#! /bin/bash
+# FS QA Test No. btrfs/052
+#
+# Verify that the btrfs ioctl clone operation can operate on the same
+# file as a source and target. That is, clone extents within the same
+# file.
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_need_to_be_root
+
+rm -f $seqres.full
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+# Create a file with 5 extents, 4 of 8Kb each and 1 of 64Kb.
+$XFS_IO_PROG -f -c pwrite -S 0x01 -b 8192 0 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+$XFS_IO_PROG -c pwrite -S 0x02 -b 8192 8192 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+$XFS_IO_PROG -c pwrite -S 0x03 -b 8192 16384 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+$XFS_IO_PROG -c pwrite -S 0x04 -b 8192 24576 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+$XFS_IO_PROG -c pwrite -S 0x05 -b 65536 32768 65536 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+
+# Digest of initial content.
+md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+# Same source and target ranges - must fail.
+$CLONER_PROG -s 8192 -d 8192 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+# Check file content didn't change.
+md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+# Intersection between source and target ranges - must fail too.
+$CLONER_PROG -s 4096 -d 8192 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+# Check file content didn't change.
+md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+# Clone an entire extent from a higher range to a lower range.
+$CLONER_PROG -s 24576 -d 0 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+
+# Check entire file, the 8Kb block at offset 0 now has the same content as the
+# 8Kb block at offset 24576.
+od -t x1 $SCRATCH_MNT/foo
+
+# Clone an entire extent from a lower range to a higher range.
+$CLONER_PROG -s 8192 -d 16384 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+
+# Check entire file, the 8Kb block at offset 0 now has the same content as the
+# 8Kb block at offset 24576, and the 8Kb block at offset 16384 now has the same
+# content as the 8Kb block at offset 8192.
+od -t x1 $SCRATCH_MNT/foo
+
+# Now clone 1 extent and an half into the file range starting at offset 65536.
+# So we get the second half of the extent at offset 16384 and the whole extent
+# at 24576 cloned into the middle of the 64Kb extent that starts at file offset
+# 32768. This makes the clone ioctl process more extent items from the b+tree
+# and forces a split of the large 64Kb extent at the end of the file.
+$CLONER_PROG -s 20480 -d 65536 -l 12288 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+
+# Check entire file. Besides the previous changes, we now should have 4096 
bytes
+# with the value 0x02 at file offset 65536, and 8192 bytes with value 0x04 at
+# the file offset 69632. The ranges [32768, 65536[ and [77824, 98304[ should
+# remain with all bytes having a value of 0x05.
+od -t

[PATCH] Btrfs-progs: debug-tree, add option to dump a single tree

2014-05-23 Thread Filipe David Borba Manana
Very often while debugging filesystems with many subvolumes and/or
snapshots, specially when they are large, I want to see only the
content of one of the trees. So this change just adds an option
to btrfs-debug-tree to allow to specify the id of the tree we're
interesting in dumping to stdout.

Example:  btrfs-debug-tree -t 257 /dev/sdc

Will only dump the tree of the first snapshot or subvolume that was
created.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 btrfs-debug-tree.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/btrfs-debug-tree.c b/btrfs-debug-tree.c
index cb6c106..36e1115 100644
--- a/btrfs-debug-tree.c
+++ b/btrfs-debug-tree.c
@@ -41,6 +41,8 @@ static int print_usage(void)
fprintf(stderr, \t-u : print info of uuid tree only\n);
fprintf(stderr, \t-b block_num : print info of the specified block
  only\n);
+   fprintf(stderr,
+   \t-t tree_id : print only the tree with the given id\n);
fprintf(stderr, %s\n, BTRFS_BUILD_VERSION);
exit(1);
 }
@@ -136,12 +138,13 @@ int main(int ac, char **av)
int root_backups = 0;
u64 block_only = 0;
struct btrfs_root *tree_root_scan;
+   u64 tree_id = 0;
 
radix_tree_init();
 
while(1) {
int c;
-   c = getopt(ac, av, deb:rRu);
+   c = getopt(ac, av, deb:rRut:);
if (c  0)
break;
switch(c) {
@@ -164,6 +167,9 @@ int main(int ac, char **av)
case 'b':
block_only = arg_strtou64(optarg);
break;
+   case 't':
+   tree_id = arg_strtou64(optarg);
+   break;
default:
print_usage();
}
@@ -208,7 +214,7 @@ int main(int ac, char **av)
goto close_root;
}
 
-   if (!(extent_only || uuid_tree_only)) {
+   if (!(extent_only || uuid_tree_only || tree_id)) {
if (roots_only) {
printf(root tree: %llu level %d\n,
 (unsigned long long)info-tree_root-node-start,
@@ -268,6 +274,8 @@ again:
  0);
if (!extent_buffer_uptodate(buf))
goto next;
+   if (tree_id  found_key.objectid != tree_id)
+   goto next;
 
switch(found_key.objectid) {
case BTRFS_ROOT_TREE_OBJECTID:
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs-progs: receive, allow to continue after errors happen

2014-05-23 Thread Filipe David Borba Manana
Due to either bugs in send (kernel) that generate a command against
a wrong path for example, or transient errors on the receiving side,
we stopped processing the send stream immediately and exited with
an error.

It's often desirable to continue processing the send stream even if an
error happens while processing a single command from the send stream.

This change just adds a --max-errors N parameter, whose default value
is 1 (preserving current behaviour), that allows to tolerate N errors
before stopping. A value of 0 means to never stop no matter how many
errors we get into while processing the send stream. Regardless of its
value, errors are always printed to stderr when they happen, just like
before this change.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 Documentation/btrfs-receive.txt |  3 +++
 cmds-receive.c  | 24 +++-
 send-stream.c   | 22 ++
 send-stream.h   |  3 ++-
 4 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/Documentation/btrfs-receive.txt b/Documentation/btrfs-receive.txt
index a67bc66..a87c086 100644
--- a/Documentation/btrfs-receive.txt
+++ b/Documentation/btrfs-receive.txt
@@ -38,6 +38,9 @@ Use this option to specify a file to use instead.
 Terminate after receiving an end cmd in the data stream.
 Without this option, the receiver terminates only if an error is recognized
 or on EOF.
+--max-errors N::
+Terminate as soon as N errors happened while processing commands from the send
+stream. Default value is 1. A value of 0 means no limit.
 
 EXIT STATUS
 ---
diff --git a/cmds-receive.c b/cmds-receive.c
index 13db4c9..1aa4e52 100644
--- a/cmds-receive.c
+++ b/cmds-receive.c
@@ -33,6 +33,7 @@
 #include wait.h
 #include assert.h
 #include time.h
+#include getopt.h
 
 #include sys/stat.h
 #include sys/types.h
@@ -954,7 +955,8 @@ static struct btrfs_send_ops send_ops = {
.fallocate = process_fallocate,
 };
 
-static int do_receive(struct btrfs_receive *r, const char *tomnt, int r_fd)
+static int do_receive(struct btrfs_receive *r, const char *tomnt, int r_fd,
+ u64 max_errors)
 {
int ret;
char *dest_dir_full_path;
@@ -1006,7 +1008,8 @@ static int do_receive(struct btrfs_receive *r, const char 
*tomnt, int r_fd)
 
while (!end) {
ret = btrfs_read_and_process_send_stream(r_fd, send_ops, r,
-r-honor_end_cmd);
+r-honor_end_cmd,
+max_errors);
if (ret  0)
goto out;
if (ret)
@@ -1049,6 +1052,11 @@ out:
return ret;
 }
 
+static const struct option long_opts[] = {
+   { max-errors, 1, NULL, 'E' },
+   { NULL, 0, NULL, 0 }
+};
+
 int cmd_receive(int argc, char **argv)
 {
int c;
@@ -1056,7 +1064,7 @@ int cmd_receive(int argc, char **argv)
char *fromfile = NULL;
struct btrfs_receive r;
int receive_fd = fileno(stdin);
-
+   u64 max_errors = 1;
int ret;
 
memset(r, 0, sizeof(r));
@@ -1064,7 +1072,7 @@ int cmd_receive(int argc, char **argv)
r.write_fd = -1;
r.dest_dir_fd = -1;
 
-   while ((c = getopt(argc, argv, evf:)) != -1) {
+   while ((c = getopt_long(argc, argv, evf:, long_opts, NULL)) != -1) {
switch (c) {
case 'v':
g_verbose++;
@@ -1075,6 +1083,9 @@ int cmd_receive(int argc, char **argv)
case 'e':
r.honor_end_cmd = 1;
break;
+   case 'E':
+   max_errors = arg_strtou64(optarg);
+   break;
case '?':
default:
fprintf(stderr, ERROR: receive args invalid.\n);
@@ -1095,7 +1106,7 @@ int cmd_receive(int argc, char **argv)
}
}
 
-   ret = do_receive(r, tomnt, receive_fd);
+   ret = do_receive(r, tomnt, receive_fd, max_errors);
 
return !!ret;
 }
@@ -1121,5 +1132,8 @@ const char * const cmd_receive_usage[] = {
 in the data stream. Without this option,,
 the receiver terminates only if an error,
 is recognized or on EOF.,
+   --max-errors N Terminate as soon as N errors happened while,
+processing commands from the send stream.,
+Default value is 1. A value of 0 means no limit.,
NULL
 };
diff --git a/send-stream.c b/send-stream.c
index 812639f..a9acdf5 100644
--- a/send-stream.c
+++ b/send-stream.c
@@ -452,13 +452,21 @@ out:
return ret;
 }
 
+/*
+ * If max_errors is 0, then don't stop processing the stream if one of the
+ * callbacks in btrfs_send_ops structure returns an error. If greater than

[PATCH] xfstests: add test for btrfs send with large xattrs

2014-05-23 Thread Filipe David Borba Manana
Verify that btrfs send is able to replicate xattrs larger than
PATH_MAX. This is possible if the b+tree leaf size is larger
than 4Kb (mkfs.btrfs's default is max(16Kb, PAGE_SIZE) as of
btrfs-progs v3.12, and max(4Kb, PAGE_SIZE in older versions).

This issue is fixed by the following linux kernel btrfs patch:

   Btrfs: send, use the right limits for xattr names and values

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/btrfs/053 | 108 
 tests/btrfs/053.out |   1 +
 tests/btrfs/group   |   1 +
 3 files changed, 110 insertions(+)
 create mode 100755 tests/btrfs/053
 create mode 100644 tests/btrfs/053.out

diff --git a/tests/btrfs/053 b/tests/btrfs/053
new file mode 100755
index 000..4dbdf59
--- /dev/null
+++ b/tests/btrfs/053
@@ -0,0 +1,108 @@
+#! /bin/bash
+# FS QA Test No. btrfs/053
+#
+# Verify that btrfs send is able to replicate xattrs larger than PATH_MAX.
+# This is possible if the b+tree leaf size is larger than 4Kb (mkfs.btrfs's
+# default is max(16Kb, PAGE_SIZE) as of btrfs-progs v3.12, and max(4Kb,
+# PAGE_SIZE in older versions).
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#   Btrfs: send, use the right limits for xattr names and values
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $send_files_dir
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/attr
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_fssum
+_require_attrs
+_need_to_be_root
+
+# max(16384, PAGE_SIZE) is the default leaf/node size on btrfs-progs v3.12+.
+# Older versions just use max(4096, PAGE_SIZE).
+# mkfs.btrfs can't create an fs with a leaf/node size smaller than PAGE_SIZE.
+leaf_size=$(echo -e 16384\n`getconf PAGE_SIZE` | sort -nr | head -1)
+
+send_files_dir=$TEST_DIR/btrfs-test-$seq
+
+rm -f $seqres.full
+rm -fr $send_files_dir
+mkdir $send_files_dir
+
+_scratch_mkfs -l $leaf_size /dev/null 21
+_scratch_mount
+
+echo hello world  $SCRATCH_MNT/foobar
+
+$SETFATTR_PROG -n user.xattr_name_1 -v `$PERL_PROG -e 'print A x 6000;'` \
+   $SCRATCH_MNT/foobar
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap1
+run_check $FSSUM_PROG -A -f -w $send_files_dir/1.fssum $SCRATCH_MNT/mysnap1
+
+# Update existing xattr value and add a new xattr too.
+$SETFATTR_PROG -n user.xattr_name_1 -v `$PERL_PROG -e 'print Z x ;'` \
+   $SCRATCH_MNT/foobar
+$SETFATTR_PROG -n user.xattr_name_2 -v `$PERL_PROG -e 'print U x ;'` \
+   $SCRATCH_MNT/foobar
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap2
+run_check $FSSUM_PROG -A -f -w $send_files_dir/2.fssum \
+   -x $SCRATCH_MNT/mysnap2/mysnap1 $SCRATCH_MNT/mysnap2
+
+_run_btrfs_util_prog send $SCRATCH_MNT/mysnap1 -f $send_files_dir/1.snap
+_run_btrfs_util_prog send $SCRATCH_MNT/mysnap2 -f $send_files_dir/2.snap
+
+_scratch_unmount
+_check_scratch_fs
+
+_scratch_mkfs -l $leaf_size /dev/null 21
+_scratch_mount
+
+_run_btrfs_util_prog receive $SCRATCH_MNT -f $send_files_dir/1.snap
+run_check $FSSUM_PROG -r $send_files_dir/1.fssum $SCRATCH_MNT/mysnap1
+
+_run_btrfs_util_prog receive $SCRATCH_MNT -f $send_files_dir/2.snap
+run_check $FSSUM_PROG -r $send_files_dir/2.fssum $SCRATCH_MNT/mysnap2
+
+_check_scratch_fs
+
+status=0
+exit
diff --git a/tests/btrfs/053.out b/tests/btrfs/053.out
new file mode 100644
index 000..4c61638
--- /dev/null
+++ b/tests/btrfs/053.out
@@ -0,0 +1 @@
+QA output created by 053
diff --git a/tests/btrfs/group b/tests/btrfs/group
index 5ff9b8e..ea49c5c 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -55,3 +55,4 @@
 050 auto
 051 auto quick
 052 auto quick
+053 auto quick
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: send, use the right limits for xattr names and values

2014-05-23 Thread Filipe David Borba Manana
We were limiting the sum of the xattr name and value lengths to PATH_MAX,
which is not correct, specially on filesystems created with btrfs-progs
v3.12 or higher, where the default leaf size is max(16384, PAGE_SIZE), or
systems with page sizes larger than 4096 bytes.

Xattrs have their own specific maximum name and value lengths, which depend
on the leaf size, therefore use these limits to be able to send xattrs with
sizes larger than PATH_MAX.

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/send.c | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 70c5e8c..1a65a40 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -995,7 +995,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct 
btrfs_path *path,
struct btrfs_dir_item *di;
struct btrfs_key di_key;
char *buf = NULL;
-   const int buf_len = PATH_MAX;
+   int buf_len;
u32 name_len;
u32 data_len;
u32 cur;
@@ -1005,6 +1005,11 @@ static int iterate_dir_item(struct btrfs_root *root, 
struct btrfs_path *path,
int num;
u8 type;
 
+   if (found_key-type == BTRFS_XATTR_ITEM_KEY)
+   buf_len = BTRFS_MAX_XATTR_SIZE(root);
+   else
+   buf_len = PATH_MAX;
+
buf = kmalloc(buf_len, GFP_NOFS);
if (!buf) {
ret = -ENOMEM;
@@ -1026,12 +1031,23 @@ static int iterate_dir_item(struct btrfs_root *root, 
struct btrfs_path *path,
type = btrfs_dir_type(eb, di);
btrfs_dir_item_key_to_cpu(eb, di, di_key);
 
-   /*
-* Path too long
-*/
-   if (name_len + data_len  buf_len) {
-   ret = -ENAMETOOLONG;
-   goto out;
+   if (type == BTRFS_FT_XATTR) {
+   if (name_len  XATTR_NAME_MAX) {
+   ret = -ENAMETOOLONG;
+   goto out;
+   }
+   if (name_len + data_len  buf_len) {
+   ret = -E2BIG;
+   goto out;
+   }
+   } else {
+   /*
+* Path too long
+*/
+   if (name_len + data_len  buf_len) {
+   ret = -ENAMETOOLONG;
+   goto out;
+   }
}
 
read_extent_buffer(eb, buf, (unsigned long)(di + 1),
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] xfstests: add helper require function _require_btrfs_cloner

2014-05-22 Thread Filipe David Borba Manana
So that the same check (btrfs cloner program presence) can be reused
by other tests.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 common/rc   | 7 +++
 tests/btrfs/035 | 4 +---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/common/rc b/common/rc
index d1788d1..f27ee53 100644
--- a/common/rc
+++ b/common/rc
@@ -2085,6 +2085,13 @@ _require_fssum()
[ -x $FSSUM_PROG ] || _notrun fssum not built
 }
 
+_require_btrfs_cloner()
+{
+   CLONER_PROG=$here/src/cloner
+   [ -x $CLONER_PROG ] || \
+   _notrun cloner binary not present at $CLONER_PROG
+}
+
 # Given 2 files, verify that they have the same mapping but different
 # inodes - i.e. an undisturbed reflink
 # Silent if so, make noise if not
diff --git a/tests/btrfs/035 b/tests/btrfs/035
index 6808179..dd303af 100755
--- a/tests/btrfs/035
+++ b/tests/btrfs/035
@@ -45,13 +45,11 @@ trap _cleanup ; exit \$status 0 1 2 3 15
 _supported_fs btrfs
 _supported_os Linux
 _require_scratch
+_require_btrfs_cloner
 
 _scratch_mkfs  /dev/null 21
 _scratch_mount
 
-CLONER_PROG=$here/src/cloner
-[ -x $CLONER_PROG ] || _notrun cloner binary not present at $CLONER_PROG
-
 src_str=aa
 
 echo -n $src_str  $SCRATCH_MNT/src
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] Btrfs: ensure readers see new data after a clone operation

2014-05-22 Thread Filipe David Borba Manana
;
}
fd2 = open(DST_FILE, O_RDWR);
if (fd2  0) {
fprintf(stderr, Error open dst file: %s\n, strerror(errno));
return 1;
}
clone_args.src_fd = fd1;
clone_args.src_offset = 0;
clone_args.src_length = 4096;
clone_args.dest_offset = 0;
ret = ioctl(fd2, BTRFS_IOC_CLONE_RANGE, clone_args);
assert(ret == 0);
close(fd1);
close(fd2);

pthread_mutex_lock(mutex);
clone_done = 1;
pthread_mutex_unlock(mutex);
ret = pthread_join(reader, NULL);
assert(ret == 0);

pthread_mutex_lock(mutex);
ret = stale_data ? 1 : 0;
pthread_mutex_unlock(mutex);
return ret;
}

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Protect against ongoing writes by locking the target range in
the io tree and wait for any existing ordered extents for that
range to finish before starting the clone operation.

V3: Fixed the locking ranges for the case where the source and target
inodes are the same. I was passing an end offset to lock_extent_range
when that function expects a range length instead. This resulted in
incorrect unlocking, leave some extent states locked forever. This
is now tested with a new test case for xfstests.

 fs/btrfs/ioctl.c | 36 +++-
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fba7a00..362720a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3410,15 +3410,41 @@ static noinline long btrfs_ioctl_clone(struct file 
*file, unsigned long srcfd,
goto out_unlock;
}
 
-   /* truncate page cache pages from target inode range */
-   truncate_inode_pages_range(inode-i_data, destoff,
-  PAGE_CACHE_ALIGN(destoff + len) - 1);
+   /*
+* Lock the target range too. Right after we replace the file extent
+* items in the fs tree (which now point to the cloned data), we might
+* have a worker replace them with extent items relative to a write
+* operation that was issued before this clone operation (i.e. confront
+* with inode.c:btrfs_finish_ordered_io).
+*/
+   if (same_inode) {
+   u64 lock_start = min_t(u64, off, destoff);
+   u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
 
-   lock_extent_range(src, off, len);
+   lock_extent_range(src, lock_start, lock_len);
+   } else {
+   lock_extent_range(src, off, len);
+   lock_extent_range(inode, destoff, len);
+   }
 
ret = btrfs_clone(src, inode, off, olen, len, destoff);
 
-   unlock_extent(BTRFS_I(src)-io_tree, off, off + len - 1);
+   if (same_inode) {
+   u64 lock_start = min_t(u64, off, destoff);
+   u64 lock_end = max_t(u64, off, destoff) + len - 1;
+
+   unlock_extent(BTRFS_I(src)-io_tree, lock_start, lock_end);
+   } else {
+   unlock_extent(BTRFS_I(src)-io_tree, off, off + len - 1);
+   unlock_extent(BTRFS_I(inode)-io_tree, destoff,
+ destoff + len - 1);
+   }
+   /*
+* Truncate page cache pages so that future reads will see the cloned
+* data immediately and not the previous data.
+*/
+   truncate_inode_pages_range(inode-i_data, destoff,
+  PAGE_CACHE_ALIGN(destoff + len) - 1);
 out_unlock:
if (!same_inode) {
if (inode  src) {
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] xfstests: add test for btrfs ioctl clone operation

2014-05-22 Thread Filipe David Borba Manana
This is a test to verify that the btrfs ioctl clone operation is
able to clone extents of a file to different positions of the file,
that is, the source and target files are the same. Existing tests
only cover the case where the source and target files are different.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/btrfs/052 | 100 
 tests/btrfs/052.out |  30 
 tests/btrfs/group   |   1 +
 3 files changed, 131 insertions(+)
 create mode 100755 tests/btrfs/052
 create mode 100644 tests/btrfs/052.out

diff --git a/tests/btrfs/052 b/tests/btrfs/052
new file mode 100755
index 000..292eb50
--- /dev/null
+++ b/tests/btrfs/052
@@ -0,0 +1,100 @@
+#! /bin/bash
+# FS QA Test No. btrfs/052
+#
+# Verify that the btrfs ioctl clone operation can operate on the same
+# file as a source and target. That is, clone extents within the same
+# file.
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs_cloner
+_need_to_be_root
+
+rm -f $seqres.full
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+# Create a file with 4 extents of 8Kb each.
+$XFS_IO_PROG -f -c pwrite -S 0x01 -b 8192 0 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+$XFS_IO_PROG -c pwrite -S 0x02 -b 8192 8192 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+$XFS_IO_PROG -c pwrite -S 0x03 -b 8192 16384 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+$XFS_IO_PROG -c pwrite -S 0x04 -b 8192 24576 8192 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+sync
+
+# Digest of initial content.
+md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+# Same source and target ranges - must fail.
+$CLONER_PROG -s 8192 -d 8192 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+# Check file content didn't change.
+md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+# Intersection between source and target ranges - must fail too.
+$CLONER_PROG -s 4096 -d 8192 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+# Check file content didn't change.
+md5sum $SCRATCH_MNT/foo | _filter_scratch
+
+# Clone from a higher range to a lower range.
+$CLONER_PROG -s 24576 -d 0 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+
+# Check entire file, the 8Kb block at offset 0 now has the same content as the
+# 8Kb block at offset 24576.
+od -t x1 $SCRATCH_MNT/foo
+
+# Clone from a lower range to a higher range.
+$CLONER_PROG -s 8192 -d 16384 -l 8192 $SCRATCH_MNT/foo $SCRATCH_MNT/foo
+
+# Check entire file, the 8Kb block at offset 0 now has the same content as the
+# 8Kb block at offset 24576, and the 8Kb block at offset 16384 now has the same
+# content as the 8Kb block at offset 8192.
+od -t x1 $SCRATCH_MNT/foo
+
+_check_scratch_fs
+
+status=0
+exit
diff --git a/tests/btrfs/052.out b/tests/btrfs/052.out
new file mode 100644
index 000..0073813
--- /dev/null
+++ b/tests/btrfs/052.out
@@ -0,0 +1,30 @@
+QA output created by 052
+wrote 8192/8192 bytes at offset 0
+XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8192/8192 bytes at offset 8192
+XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8192/8192 bytes at offset 16384
+XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8192/8192 bytes at offset 24576
+XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+b328fe91ed791d96b3ca6830ef50475f  SCRATCH_MNT/foo
+clone failed: Invalid argument
+b328fe91ed791d96b3ca6830ef50475f  SCRATCH_MNT/foo
+clone failed: Invalid argument
+b328fe91ed791d96b3ca6830ef50475f  SCRATCH_MNT/foo
+000 04 04 04 04 04 04 04 04 04 04 04 04 04 04 04 04
+*
+002 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
+*
+004 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03
+*
+006 04 04 04 04 04 04 04 04 04 04 04 04 04 04 04 04
+*
+010
+000 04 04 04 04 04 04 04 04 04 04 04 04 04 04 04 04
+*
+002 02

[PATCH] Btrfs: send, fix corrupted paths strings for long paths

2014-05-21 Thread Filipe David Borba Manana
If a path has more than 230 characters, we allocate a new buffer to
use for the path, but we were forgotting to copy the contents of the
previous buffer into the new one, which has random content from the
kmalloc call.

Test:

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt


TEST_PATH=/mnt/fdmanana/.config/google-chrome-mysetup/Default/Pepper_Data/Shockwave_Flash/WritableRoot/#SharedObjects/JSHJ4ZKN/s.wsj.net/[[IMPORT]]/players.edgesuite.net/flash/plugins/osmf/advanced-streaming-plugin/v2.7/osmf1.6/Ak#
mkdir -p $TEST_PATH
echo hello world  $TEST_PATH/amaiAdvancedStreamingPlugin.txt

btrfs subvolume snapshot -r /mnt /mnt/mysnap1
btrfs send /mnt/mysnap1 -f /tmp/1.snap

A test for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
Cc: Marc Merlin m...@merlins.org
---
 fs/btrfs/send.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f6bbc1e..70c5e8c 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -368,10 +368,13 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
/*
 * First time the inline_buf does not suffice
 */
-   if (p-buf == p-inline_buf)
+   if (p-buf == p-inline_buf) {
tmp_buf = kmalloc(len, GFP_NOFS);
-   else
+   if (tmp_buf)
+   memcpy(tmp_buf, p-buf, old_buf_len);
+   } else {
tmp_buf = krealloc(p-buf, len, GFP_NOFS);
+   }
if (!tmp_buf)
return -ENOMEM;
p-buf = tmp_buf;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] xfstests: add test for btrfs send with long paths

2014-05-21 Thread Filipe David Borba Manana
Regression test for btrfs send where long paths (exceeding 230 characters)
made send produce paths with random characters from a memory buffer returned
by kmalloc, as send forgot to populate the new buffer with the path string.

This issue is fixed by the following linux kernel btrfs patch:

   Btrfs: send, fix corrupted path strings for long paths

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/btrfs/051 | 85 +
 tests/btrfs/051.out |  1 +
 tests/btrfs/group   |  1 +
 3 files changed, 87 insertions(+)
 create mode 100755 tests/btrfs/051
 create mode 100644 tests/btrfs/051.out

diff --git a/tests/btrfs/051 b/tests/btrfs/051
new file mode 100755
index 000..53df664
--- /dev/null
+++ b/tests/btrfs/051
@@ -0,0 +1,85 @@
+#! /bin/bash
+# FS QA Test No. btrfs/051
+#
+# Regression test for btrfs send where long paths (exceeding 230 characters)
+# made send produce paths with random characters from a memory buffer returned
+# by kmalloc, as send forgot to populate the new buffer with the path string.
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#   Btrfs: send, fix corrupted path strings for long paths
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $send_files_dir
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_fssum
+_need_to_be_root
+
+send_files_dir=$TEST_DIR/btrfs-test-$seq
+
+rm -f $seqres.full
+rm -fr $send_files_dir
+mkdir $send_files_dir
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+TEST_PATH=$SCRATCH_MNT/fdmanana/.config/google-chrome-mysetup/Default/Pepper_Data/Shockwave_Flash/WritableRoot/#SharedObjects/JSHJ4ZKN/s.wsj.net/[[IMPORT]]/players.edgesuite.net/flash/plugins/osmf/advanced-streaming-plugin/v2.7/osmf1.6/Ak#
+
+mkdir -p $TEST_PATH
+echo hello world  $TEST_PATH/amaiAdvancedStreamingPlugin.txt
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap1
+run_check $FSSUM_PROG -A -f -w $send_files_dir/1.fssum $SCRATCH_MNT/mysnap1
+_run_btrfs_util_prog send $SCRATCH_MNT/mysnap1 -f $send_files_dir/1.snap
+
+_scratch_unmount
+_check_scratch_fs
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+_run_btrfs_util_prog receive $SCRATCH_MNT -f $send_files_dir/1.snap
+run_check $FSSUM_PROG -r $send_files_dir/1.fssum $SCRATCH_MNT/mysnap1
+
+_check_scratch_fs
+
+status=0
+exit
diff --git a/tests/btrfs/051.out b/tests/btrfs/051.out
new file mode 100644
index 000..636dcef
--- /dev/null
+++ b/tests/btrfs/051.out
@@ -0,0 +1 @@
+QA output created by 051
diff --git a/tests/btrfs/group b/tests/btrfs/group
index 69a80e0..0673449 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -53,3 +53,4 @@
 048 auto quick
 049 auto quick
 050 auto
+051 auto quick
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: send, fix corrupted path strings for long paths

2014-05-21 Thread Filipe David Borba Manana
If a path has more than 230 characters, we allocate a new buffer to
use for the path, but we were forgotting to copy the contents of the
previous buffer into the new one, which has random content from the
kmalloc call.

Test:

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt


TEST_PATH=/mnt/fdmanana/.config/google-chrome-mysetup/Default/Pepper_Data/Shockwave_Flash/WritableRoot/#SharedObjects/JSHJ4ZKN/s.wsj.net/[[IMPORT]]/players.edgesuite.net/flash/plugins/osmf/advanced-streaming-plugin/v2.7/osmf1.6/Ak#
mkdir -p $TEST_PATH
echo hello world  $TEST_PATH/amaiAdvancedStreamingPlugin.txt

btrfs subvolume snapshot -r /mnt /mnt/mysnap1
btrfs send /mnt/mysnap1 -f /tmp/1.snap

A test for xfstests follows.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
Cc: Marc Merlin m...@merlins.org
---

V2: Fix change title, paths to path.

 fs/btrfs/send.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f6bbc1e..70c5e8c 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -368,10 +368,13 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
/*
 * First time the inline_buf does not suffice
 */
-   if (p-buf == p-inline_buf)
+   if (p-buf == p-inline_buf) {
tmp_buf = kmalloc(len, GFP_NOFS);
-   else
+   if (tmp_buf)
+   memcpy(tmp_buf, p-buf, old_buf_len);
+   } else {
tmp_buf = krealloc(p-buf, len, GFP_NOFS);
+   }
if (!tmp_buf)
return -ENOMEM;
p-buf = tmp_buf;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: ensure readers see new data after a clone operation

2014-05-19 Thread Filipe David Borba Manana
;
}
fd2 = open(DST_FILE, O_RDWR);
if (fd2  0) {
fprintf(stderr, Error open dst file: %s\n, strerror(errno));
return 1;
}
clone_args.src_fd = fd1;
clone_args.src_offset = 0;
clone_args.src_length = 4096;
clone_args.dest_offset = 0;
ret = ioctl(fd2, BTRFS_IOC_CLONE_RANGE, clone_args);
assert(ret == 0);
close(fd1);
close(fd2);

pthread_mutex_lock(mutex);
clone_done = 1;
pthread_mutex_unlock(mutex);
ret = pthread_join(reader, NULL);
assert(ret == 0);

pthread_mutex_lock(mutex);
ret = stale_data ? 1 : 0;
pthread_mutex_unlock(mutex);
return ret;
}

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Protect against ongoing writes by locking the target range in
the io tree and wait for any existing ordered extents for that
range to finish before starting the clone operation.

 fs/btrfs/ioctl.c | 34 --
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fba7a00..12a60ef 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3410,15 +3410,37 @@ static noinline long btrfs_ioctl_clone(struct file 
*file, unsigned long srcfd,
goto out_unlock;
}
 
-   /* truncate page cache pages from target inode range */
-   truncate_inode_pages_range(inode-i_data, destoff,
-  PAGE_CACHE_ALIGN(destoff + len) - 1);
-
-   lock_extent_range(src, off, len);
+   /*
+* Lock the target range too. Right after we replace the file extent
+* items in the fs tree (which now point to the cloned data), we might
+* have a worker replace them with extent items relative to a write
+* operation that was issued before this clone operation (i.e. confront
+* with inode.c:btrfs_finish_ordered_io).
+*/
+   if (same_inode) {
+   lock_extent_range(src, min_t(u64, off, destoff),
+ max_t(u64, off + len, destoff + len));
+   } else {
+   lock_extent_range(src, off, len);
+   lock_extent_range(inode, destoff, len);
+   }
 
ret = btrfs_clone(src, inode, off, olen, len, destoff);
 
-   unlock_extent(BTRFS_I(src)-io_tree, off, off + len - 1);
+   if (same_inode) {
+   unlock_extent(BTRFS_I(src)-io_tree, min_t(u64, off, destoff),
+ max_t(u64, off + len, destoff + len) - 1);
+   } else {
+   unlock_extent(BTRFS_I(src)-io_tree, off, off + len - 1);
+   unlock_extent(BTRFS_I(inode)-io_tree, destoff,
+ destoff + len - 1);
+   }
+   /*
+* Truncate page cache pages so that future reads will see the cloned
+* data immediately and not the previous data.
+*/
+   truncate_inode_pages_range(inode-i_data, destoff,
+  PAGE_CACHE_ALIGN(destoff + len) - 1);
 out_unlock:
if (!same_inode) {
if (inode  src) {
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix leak of block group cache objects

2014-05-16 Thread Filipe David Borba Manana
The change titled:

 Btrfs: fix broken free space cache after the system crashed

can increment a block group cache object twice in find_free_extent() and
never decrement it twice, resulting in a memory leak.

This is easy to reproduce by having kmemleak enabled and the following
steps:

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt
umount /mnt
rmmod btrfs

cat /sys/kernel/debug/kmemleak
unreferenced object 0x8802089249d8 (size 512):
  comm mount, pid 6826, jiffies 430600 (age 3065.636s)
  hex dump (first 32 bytes):
00 00 c0 01 00 00 00 00 c0 00 00 00 40 00 00 00  @...
00 00 c0 01 00 00 00 00 00 00 01 00 00 00 00 00  
  backtrace:
[816ab3b6] kmemleak_alloc+0x26/0x50
[8119447d] kmem_cache_alloc_trace+0x11d/0x1e0
[a02a369c] btrfs_create_block_group_cache+0x3c/0x160 [btrfs]
[a02adf07] btrfs_read_block_groups+0x1d7/0x650 [btrfs]
[a02bc800] open_ctree+0x16a0/0x20c0 [btrfs]
[a0293321] btrfs_mount+0x6b1/0x980 [btrfs]
[811aa9c0] mount_fs+0x20/0xe0
[811c76f3] vfs_kern_mount+0x73/0x170
[811ca046] do_mount+0x206/0xb20
[811cac4e] SyS_mount+0x8e/0xe0
[816c8492] system_call_fastpath+0x16/0x1b
[] 0x
unreferenced object 0x8802019571d0 (size 128):
  comm mount, pid 6826, jiffies 430600 (age 3065.684s)
  hex dump (first 32 bytes):
4d 06 4d 06 ad 4e ad de ff ff ff ff 00 00 00 00  M.M..N..
ff ff ff ff ff ff ff ff 90 0d 36 a0 ff ff ff ff  ..6.
  backtrace:
[816ab3b6] kmemleak_alloc+0x26/0x50
[8119447d] kmem_cache_alloc_trace+0x11d/0x1e0
[a02a36be] btrfs_create_block_group_cache+0x5e/0x160 [btrfs]
[a02adf07] btrfs_read_block_groups+0x1d7/0x650 [btrfs]
[a02bc800] open_ctree+0x16a0/0x20c0 [btrfs]
[a0293321] btrfs_mount+0x6b1/0x980 [btrfs]
[811aa9c0] mount_fs+0x20/0xe0
[811c76f3] vfs_kern_mount+0x73/0x170
[811ca046] do_mount+0x206/0xb20
[811cac4e] SyS_mount+0x8e/0xe0
[816c8492] system_call_fastpath+0x16/0x1b
[] 0x

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

Note: this only affects Chris' integration branch.

 fs/btrfs/extent-tree.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index eb0760f..0bad610 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6456,6 +6456,8 @@ static noinline int find_free_extent(struct btrfs_root 
*orig_root,
} else {
index = get_block_group_index(block_group);
btrfs_grab_block_group(block_group, delalloc);
+   /* compensate get by btrfs_grab_block_group() */
+   btrfs_put_block_group(block_group);
goto have_block_group;
}
} else if (block_group) {
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: send, fix incorrect ref access when using extrefs

2014-05-13 Thread Filipe David Borba Manana
When running send, if an inode only has extended reference items
associated to it and no regular references, send.c:get_first_ref()
was incorrectly assuming the reference it found was of type
BTRFS_INODE_REF_KEY due to use of the wrong key variable.
This caused weird behaviour when using the found item has a regular
reference, such as weird path string, and occasionally (when lucky)
a crash:

[  190.600652] general protection fault:  [#1] SMP DEBUG_PAGEALLOC
[  190.600994] Modules linked in: btrfs xor raid6_pq binfmt_misc nfsd 
auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc psmouse serio_raw 
evbug pcspkr i2c_piix4 e1000 floppy
[  190.602565] CPU: 2 PID: 14520 Comm: btrfs Not tainted 
3.13.0-fdm-btrfs-next-26+ #1
[  190.602728] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  190.602868] task: 8800d447c920 ti: 8801fa79e000 task.ti: 
8801fa79e000
[  190.603030] RIP: 0010:[813266b4]  [813266b4] 
memcpy+0x54/0x110
[  190.603262] RSP: 0018:8801fa79f880  EFLAGS: 00010202
[  190.603395] RAX: 8800d4326e3f RBX: 036a RCX: 8800
[  190.603553] RDX: 032a RSI: ffe708844042936a RDI: 8800d43271a9
[  190.603710] RBP: 8801fa79f8c8 R08: 003a4ef0 R09: 
[  190.603867] R10: 793a4ef09f00 R11: 9f53726f R12: 8800d43271a9
[  190.604020] R13: 1600 R14: 8802110134f0 R15: 036a
[  190.604020] FS:  7fb423d09b80() GS:88021620() 
knlGS:
[  190.604020] CS:  0010 DS:  ES:  CR0: 8005003b
[  190.604020] CR2: 7fb4229d4b78 CR3: 0001f5d76000 CR4: 06e0
[  190.604020] Stack:
[  190.604020]  a01f4d49 8801fa79f8f0 09f9 
8801fa79f8c8
[  190.604020]  09f9 880211013260 f971 
88021147dba8
[  190.604020]  09f9 8801fa79f918 a02367f5 
8801fa79f928
[  190.604020] Call Trace:
[  190.604020]  [a01f4d49] ? read_extent_buffer+0xb9/0x120 [btrfs]
[  190.604020]  [a02367f5] fs_path_add_from_extent_buffer+0x45/0x60 
[btrfs]
[  190.604020]  [a0238806] get_first_ref+0x1f6/0x210 [btrfs]
[  190.604020]  [a0238994] __get_cur_name_and_parent+0x174/0x3a0 
[btrfs]
[  190.604020]  [8118df3d] ? kmem_cache_alloc_trace+0x11d/0x1e0
[  190.604020]  [a0236674] ? fs_path_alloc+0x24/0x60 [btrfs]
[  190.604020]  [a0238c91] get_cur_path+0xd1/0x240 [btrfs]
(...)

Steps to reproduce (either crash or some weirdness like an odd path string):

mkfs.btrfs -f -O extref /dev/sdd
mount /dev/sdd /mnt

mkdir /mnt/testdir
touch /mnt/testdir/foobar

for i in `seq 1 2550`; do
ln /mnt/testdir/foobar /mnt/testdir/foobar_link_`printf %04d $i`
done

ln /mnt/testdir/foobar /mnt/testdir/final_foobar_name

rm -f /mnt/testdir/foobar
for i in `seq 1 2550`; do
rm -f /mnt/testdir/foobar_link_`printf %04d $i`
done

btrfs subvolume snapshot -r /mnt /mnt/mysnap
btrfs send /mnt/mysnap -f /tmp/mysnap.send

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/send.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 40f353f..0035bdd 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1688,7 +1688,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
goto out;
}
 
-   if (key.type == BTRFS_INODE_REF_KEY) {
+   if (found_key.type == BTRFS_INODE_REF_KEY) {
struct btrfs_inode_ref *iref;
iref = btrfs_item_ptr(path-nodes[0], path-slots[0],
  struct btrfs_inode_ref);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] xfstests: btrfs, add regression test for send with extrefs

2014-05-13 Thread Filipe David Borba Manana
Regression for btrfs send when an inode only has extended references
associated to it (no regular references present). This used to cause
incorrect access to a b+tree leaf, where an extended reference item
was accessed as if it were a regular reference item, causing unexpected
and unpredictable behaviour such as producing a random/weird path string
or a crash.

This issue is fixed by the following linux kernel btrfs patch:

   Btrfs: send, fix incorrect ref access when using extrefs

Cc: Josef Bacik jba...@fb.com
Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/btrfs/050 | 109 
 tests/btrfs/050.out |   1 +
 tests/btrfs/group   |   1 +
 3 files changed, 111 insertions(+)
 create mode 100755 tests/btrfs/050
 create mode 100644 tests/btrfs/050.out

diff --git a/tests/btrfs/050 b/tests/btrfs/050
new file mode 100755
index 000..6e4bd13
--- /dev/null
+++ b/tests/btrfs/050
@@ -0,0 +1,109 @@
+#! /bin/bash
+# FS QA Test No. btrfs/050
+#
+# Regression for btrfs send when an inode only has extended references
+# associated to it (no regular references present). This used to cause
+# incorrect access to a b+tree leaf, where an extended reference item
+# was accessed as if it were a regular reference item, causing unexpected
+# and unpredictable behaviour such as producing a random/weird path string
+# or a crash.
+#
+# This issue is fixed by the following linux kernel btrfs patch:
+#
+#   Btrfs: send, fix incorrect ref access when using extrefs
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $send_files_dir
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_fssum
+_need_to_be_root
+
+send_files_dir=$TEST_DIR/btrfs-test-$seq
+
+rm -f $seqres.full
+rm -fr $send_files_dir
+mkdir $send_files_dir
+
+_scratch_mkfs -O extref /dev/null 21
+_scratch_mount
+
+# 2550 hard links is enough to cause creation of extended references
+# even if the leaf/node size is 64Kb (largest possible).
+NUM_LINKS=2550
+TEST_PATH=$SCRATCH_MNT/home/john/files/series/qwerty
+
+mkdir -p $TEST_PATH
+touch $TEST_PATH/foobar
+
+# Create a bunch of hard links for the file, such that at least one
+# inode extended reference item is created.
+for i in `seq 1 $NUM_LINKS`; do
+   ln $TEST_PATH/foobar $TEST_PATH/foobar_link_`printf %04d $i`
+done
+
+# The only link we'll have alive at the end.
+ln $TEST_PATH/foobar $TEST_PATH/final_foobar_name
+
+# Now delete all previous hard links (except the last one). This will
+# remove the regular inode reference item from the b+tree, and will
+# leave only an inode extended reference item, which is the condition
+# necessary to trigger the bug.
+rm -f $TEST_PATH/foobar
+for i in `seq 1 $NUM_LINKS`; do
+   rm -f $TEST_PATH/foobar_link_`printf %04d $i`
+done
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap1
+run_check $FSSUM_PROG -A -f -w $send_files_dir/1.fssum $SCRATCH_MNT/mysnap1
+_run_btrfs_util_prog send $SCRATCH_MNT/mysnap1 -f $send_files_dir/1.snap
+
+_scratch_unmount
+_check_scratch_fs
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+_run_btrfs_util_prog receive $SCRATCH_MNT -f $send_files_dir/1.snap
+run_check $FSSUM_PROG -r $send_files_dir/1.fssum $SCRATCH_MNT/mysnap1
+
+_check_scratch_fs
+
+status=0
+exit
diff --git a/tests/btrfs/050.out b/tests/btrfs/050.out
new file mode 100644
index 000..37f2cbc
--- /dev/null
+++ b/tests/btrfs/050.out
@@ -0,0 +1 @@
+QA output created by 050
diff --git a/tests/btrfs/group b/tests/btrfs/group
index 59b0c98..69a80e0 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -52,3 +52,4 @@
 047 auto quick
 048 auto quick
 049 auto quick
+050 auto
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http

[PATCH v3] Btrfs: fix hang on error (such as ENOSPC) when writing extent pages

2014-05-09 Thread Filipe David Borba Manana
When running low on available disk space and having several processes
doing buffered file IO, I got the following trace in dmesg:

[ 4202.720152] INFO: task kworker/u8:1:5450 blocked for more than 120 seconds.
[ 4202.720401]   Not tainted 3.13.0-fdm-btrfs-next-26+ #1
[ 4202.720596] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[ 4202.720874] kworker/u8:1D 0001 0  5450  2 0x
[ 4202.720904] Workqueue: btrfs-flush_delalloc normal_work_helper [btrfs]
[ 4202.720908]  8801f62ddc38 0082 880203ac2490 
001d3f40
[ 4202.720913]  8801f62ddfd8 001d3f40 8800c4f0c920 
880203ac2490
[ 4202.720918]  001d4a40 88020fe85a40 88020fe85ab8 
0001
[ 4202.720922] Call Trace:
[ 4202.720931]  [816a3cb9] schedule+0x29/0x70
[ 4202.720950]  [a01ec48d] btrfs_start_ordered_extent+0x6d/0x110 
[btrfs]
[ 4202.720956]  [8108e620] ? bit_waitqueue+0xc0/0xc0
[ 4202.720972]  [a01ec559] btrfs_run_ordered_extent_work+0x29/0x40 
[btrfs]
[ 4202.720988]  [a0201987] normal_work_helper+0x137/0x2c0 [btrfs]
[ 4202.720994]  [810680e5] process_one_work+0x1f5/0x530
(...)
[ 4202.721027] 2 locks held by kworker/u8:1/5450:
[ 4202.721028]  #0:  (%s-%s){..}, at: [81068083] 
process_one_work+0x193/0x530
[ 4202.721037]  #1:  ((work-normal_work)){+.+...}, at: [81068083] 
process_one_work+0x193/0x530
[ 4202.721054] INFO: task btrfs:7891 blocked for more than 120 seconds.
[ 4202.721258]   Not tainted 3.13.0-fdm-btrfs-next-26+ #1
[ 4202.721444] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[ 4202.721699] btrfs   D 0001 0  7891   7890 0x0001
[ 4202.721704]  88018c2119e8 0086 8800a33d2490 
001d3f40
[ 4202.721710]  88018c211fd8 001d3f40 8802144b 
8800a33d2490
[ 4202.721714]  8800d8576640 88020fe85bc0 88020fe85bc8 
7fff
[ 4202.721718] Call Trace:
[ 4202.721723]  [816a3cb9] schedule+0x29/0x70
[ 4202.721727]  [816a2ebc] schedule_timeout+0x1dc/0x270
[ 4202.721732]  [8109bd79] ? mark_held_locks+0xb9/0x140
[ 4202.721736]  [816a90c0] ? _raw_spin_unlock_irq+0x30/0x40
[ 4202.721740]  [8109bf0d] ? trace_hardirqs_on_caller+0x10d/0x1d0
[ 4202.721744]  [816a488f] wait_for_completion+0xdf/0x120
[ 4202.721749]  [8107fa90] ? try_to_wake_up+0x310/0x310
[ 4202.721765]  [a01ebee4] btrfs_wait_ordered_extents+0x1f4/0x280 
[btrfs]
[ 4202.721781]  [a020526e] btrfs_mksubvol.isra.62+0x30e/0x5a0 [btrfs]
[ 4202.721786]  [8108e620] ? bit_waitqueue+0xc0/0xc0
[ 4202.721799]  [a02056a9] 
btrfs_ioctl_snap_create_transid+0x1a9/0x1b0 [btrfs]
[ 4202.721813]  [a020583a] btrfs_ioctl_snap_create_v2+0x10a/0x170 
[btrfs]
(...)

It turns out that extent_io.c:__extent_writepage(), which ends up being called
through filemap_fdatawrite_range() in btrfs_start_ordered_extent(), was getting
-ENOSPC when calling the fill_delalloc callback. In this situation, it returned
without the writepage_end_io_hook callback (inode.c:btrfs_writepage_end_io_hook)
ever being called for the respective page, which prevents the ordered extent's
bytes_left count from ever reaching 0, and therefore a finish_ordered_fn work
is never queued into the endio_write_workers queue. This makes the task that
called btrfs_start_ordered_extent() hang forever on the wait queue of the 
ordered
extent.

This is fairly easy to reproduce using a small filesystem and fsstress on
a quad core vm:

mkfs.btrfs -f -b `expr 2100 \* 1024 \* 1024` /dev/sdd
mount /dev/sdd /mnt

fsstress -p 6 -d /mnt -n 10 -x \
btrfs subvolume snapshot -r /mnt /mnt/mysnap \
-f allocsp=0 \
-f bulkstat=0 \
-f bulkstat1=0 \
-f chown=0 \
-f creat=1 \
-f dread=0 \
-f dwrite=0 \
-f fallocate=1 \
-f fdatasync=0 \
-f fiemap=0 \
-f freesp=0 \
-f fsync=0 \
-f getattr=0 \
-f getdents=0 \
-f link=0 \
-f mkdir=0 \
-f mknod=0 \
-f punch=1 \
-f read=0 \
-f readlink=0 \
-f rename=0 \
-f resvsp=0 \
-f rmdir=0 \
-f setxattr=0 \
-f stat=0 \
-f symlink=0 \
-f sync=0 \
-f truncate=1 \
-f unlink=0 \
-f unresvsp=0 \
-f write=4

So just ensure that if an error happens while writing the extent page
we call the writepage_end_io_hook callback. Also make it return the
error code and ensure the caller (extent_write_cache_pages) processes
all pages in the page vector even if an error happens only for some
of them, so that ordered extents end up released.

Signed-off-by: Filipe David Borba Manana

[PATCH] Btrfs: fix hang on error (such as ENOSPC) when writing extent pages

2014-05-08 Thread Filipe David Borba Manana
When running low on available disk space and having several processes
doing buffered file IO, I got the following trace in dmesg:

[ 4202.720152] INFO: task kworker/u8:1:5450 blocked for more than 120 seconds.
[ 4202.720401]   Not tainted 3.13.0-fdm-btrfs-next-26+ #1
[ 4202.720596] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[ 4202.720874] kworker/u8:1D 0001 0  5450  2 0x
[ 4202.720904] Workqueue: btrfs-flush_delalloc normal_work_helper [btrfs]
[ 4202.720908]  8801f62ddc38 0082 880203ac2490 
001d3f40
[ 4202.720913]  8801f62ddfd8 001d3f40 8800c4f0c920 
880203ac2490
[ 4202.720918]  001d4a40 88020fe85a40 88020fe85ab8 
0001
[ 4202.720922] Call Trace:
[ 4202.720931]  [816a3cb9] schedule+0x29/0x70
[ 4202.720950]  [a01ec48d] btrfs_start_ordered_extent+0x6d/0x110 
[btrfs]
[ 4202.720956]  [8108e620] ? bit_waitqueue+0xc0/0xc0
[ 4202.720972]  [a01ec559] btrfs_run_ordered_extent_work+0x29/0x40 
[btrfs]
[ 4202.720988]  [a0201987] normal_work_helper+0x137/0x2c0 [btrfs]
[ 4202.720994]  [810680e5] process_one_work+0x1f5/0x530
(...)
[ 4202.721027] 2 locks held by kworker/u8:1/5450:
[ 4202.721028]  #0:  (%s-%s){..}, at: [81068083] 
process_one_work+0x193/0x530
[ 4202.721037]  #1:  ((work-normal_work)){+.+...}, at: [81068083] 
process_one_work+0x193/0x530
[ 4202.721054] INFO: task btrfs:7891 blocked for more than 120 seconds.
[ 4202.721258]   Not tainted 3.13.0-fdm-btrfs-next-26+ #1
[ 4202.721444] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[ 4202.721699] btrfs   D 0001 0  7891   7890 0x0001
[ 4202.721704]  88018c2119e8 0086 8800a33d2490 
001d3f40
[ 4202.721710]  88018c211fd8 001d3f40 8802144b 
8800a33d2490
[ 4202.721714]  8800d8576640 88020fe85bc0 88020fe85bc8 
7fff
[ 4202.721718] Call Trace:
[ 4202.721723]  [816a3cb9] schedule+0x29/0x70
[ 4202.721727]  [816a2ebc] schedule_timeout+0x1dc/0x270
[ 4202.721732]  [8109bd79] ? mark_held_locks+0xb9/0x140
[ 4202.721736]  [816a90c0] ? _raw_spin_unlock_irq+0x30/0x40
[ 4202.721740]  [8109bf0d] ? trace_hardirqs_on_caller+0x10d/0x1d0
[ 4202.721744]  [816a488f] wait_for_completion+0xdf/0x120
[ 4202.721749]  [8107fa90] ? try_to_wake_up+0x310/0x310
[ 4202.721765]  [a01ebee4] btrfs_wait_ordered_extents+0x1f4/0x280 
[btrfs]
[ 4202.721781]  [a020526e] btrfs_mksubvol.isra.62+0x30e/0x5a0 [btrfs]
[ 4202.721786]  [8108e620] ? bit_waitqueue+0xc0/0xc0
[ 4202.721799]  [a02056a9] 
btrfs_ioctl_snap_create_transid+0x1a9/0x1b0 [btrfs]
[ 4202.721813]  [a020583a] btrfs_ioctl_snap_create_v2+0x10a/0x170 
[btrfs]
(...)

It turns out that extent_io.c:__extent_writepage(), which ends up being called
through filemap_fdatawrite_range() in btrfs_start_ordered_extent(), was getting
-ENOSPC when calling the fill_delalloc callback. In this situation, it returned
without the writepage_end_io_hook callback (inode.c:btrfs_writepage_end_io_hook)
ever being called for the respective page, which prevents the ordered extent's
bytes_left count from ever reaching 0, and therefore a finish_ordered_fn work
is never queued into the endio_write_workers queue. This makes the task that
called btrfs_start_ordered_extent() hang forever on the wait queue of the 
ordered
extent.

This is fairly easy to reproduce using a small filesystem and fsstress on
a quad core vm:

mkfs.btrfs -f -b `expr 2100 \* 1024 \* 1024` /dev/sdd
mount /dev/sdd /mnt

fsstress -p 6 -d /mnt -n 10 -x \
btrfs subvolume snapshot -r /mnt /mnt/mysnap \
-f allocsp=0 \
-f bulkstat=0 \
-f bulkstat1=0 \
-f chown=0 \
-f creat=1 \
-f dread=0 \
-f dwrite=0 \
-f fallocate=1 \
-f fdatasync=0 \
-f fiemap=0 \
-f freesp=0 \
-f fsync=0 \
-f getattr=0 \
-f getdents=0 \
-f link=0 \
-f mkdir=0 \
-f mknod=0 \
-f punch=1 \
-f read=0 \
-f readlink=0 \
-f rename=0 \
-f resvsp=0 \
-f rmdir=0 \
-f setxattr=0 \
-f stat=0 \
-f symlink=0 \
-f sync=0 \
-f truncate=1 \
-f unlink=0 \
-f unresvsp=0 \
-f write=4

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/extent_io.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0c43896..b5a097f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3374,9 +3374,13 @@ done:
 
 done_unlocked:
 
+   if (PageError(page

[PATCH v2] Btrfs: fix hang on error (such as ENOSPC) when writing extent pages

2014-05-08 Thread Filipe David Borba Manana
When running low on available disk space and having several processes
doing buffered file IO, I got the following trace in dmesg:

[ 4202.720152] INFO: task kworker/u8:1:5450 blocked for more than 120 seconds.
[ 4202.720401]   Not tainted 3.13.0-fdm-btrfs-next-26+ #1
[ 4202.720596] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[ 4202.720874] kworker/u8:1D 0001 0  5450  2 0x
[ 4202.720904] Workqueue: btrfs-flush_delalloc normal_work_helper [btrfs]
[ 4202.720908]  8801f62ddc38 0082 880203ac2490 
001d3f40
[ 4202.720913]  8801f62ddfd8 001d3f40 8800c4f0c920 
880203ac2490
[ 4202.720918]  001d4a40 88020fe85a40 88020fe85ab8 
0001
[ 4202.720922] Call Trace:
[ 4202.720931]  [816a3cb9] schedule+0x29/0x70
[ 4202.720950]  [a01ec48d] btrfs_start_ordered_extent+0x6d/0x110 
[btrfs]
[ 4202.720956]  [8108e620] ? bit_waitqueue+0xc0/0xc0
[ 4202.720972]  [a01ec559] btrfs_run_ordered_extent_work+0x29/0x40 
[btrfs]
[ 4202.720988]  [a0201987] normal_work_helper+0x137/0x2c0 [btrfs]
[ 4202.720994]  [810680e5] process_one_work+0x1f5/0x530
(...)
[ 4202.721027] 2 locks held by kworker/u8:1/5450:
[ 4202.721028]  #0:  (%s-%s){..}, at: [81068083] 
process_one_work+0x193/0x530
[ 4202.721037]  #1:  ((work-normal_work)){+.+...}, at: [81068083] 
process_one_work+0x193/0x530
[ 4202.721054] INFO: task btrfs:7891 blocked for more than 120 seconds.
[ 4202.721258]   Not tainted 3.13.0-fdm-btrfs-next-26+ #1
[ 4202.721444] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[ 4202.721699] btrfs   D 0001 0  7891   7890 0x0001
[ 4202.721704]  88018c2119e8 0086 8800a33d2490 
001d3f40
[ 4202.721710]  88018c211fd8 001d3f40 8802144b 
8800a33d2490
[ 4202.721714]  8800d8576640 88020fe85bc0 88020fe85bc8 
7fff
[ 4202.721718] Call Trace:
[ 4202.721723]  [816a3cb9] schedule+0x29/0x70
[ 4202.721727]  [816a2ebc] schedule_timeout+0x1dc/0x270
[ 4202.721732]  [8109bd79] ? mark_held_locks+0xb9/0x140
[ 4202.721736]  [816a90c0] ? _raw_spin_unlock_irq+0x30/0x40
[ 4202.721740]  [8109bf0d] ? trace_hardirqs_on_caller+0x10d/0x1d0
[ 4202.721744]  [816a488f] wait_for_completion+0xdf/0x120
[ 4202.721749]  [8107fa90] ? try_to_wake_up+0x310/0x310
[ 4202.721765]  [a01ebee4] btrfs_wait_ordered_extents+0x1f4/0x280 
[btrfs]
[ 4202.721781]  [a020526e] btrfs_mksubvol.isra.62+0x30e/0x5a0 [btrfs]
[ 4202.721786]  [8108e620] ? bit_waitqueue+0xc0/0xc0
[ 4202.721799]  [a02056a9] 
btrfs_ioctl_snap_create_transid+0x1a9/0x1b0 [btrfs]
[ 4202.721813]  [a020583a] btrfs_ioctl_snap_create_v2+0x10a/0x170 
[btrfs]
(...)

It turns out that extent_io.c:__extent_writepage(), which ends up being called
through filemap_fdatawrite_range() in btrfs_start_ordered_extent(), was getting
-ENOSPC when calling the fill_delalloc callback. In this situation, it returned
without the writepage_end_io_hook callback (inode.c:btrfs_writepage_end_io_hook)
ever being called for the respective page, which prevents the ordered extent's
bytes_left count from ever reaching 0, and therefore a finish_ordered_fn work
is never queued into the endio_write_workers queue. This makes the task that
called btrfs_start_ordered_extent() hang forever on the wait queue of the 
ordered
extent.

This is fairly easy to reproduce using a small filesystem and fsstress on
a quad core vm:

mkfs.btrfs -f -b `expr 2100 \* 1024 \* 1024` /dev/sdd
mount /dev/sdd /mnt

fsstress -p 6 -d /mnt -n 10 -x \
btrfs subvolume snapshot -r /mnt /mnt/mysnap \
-f allocsp=0 \
-f bulkstat=0 \
-f bulkstat1=0 \
-f chown=0 \
-f creat=1 \
-f dread=0 \
-f dwrite=0 \
-f fallocate=1 \
-f fdatasync=0 \
-f fiemap=0 \
-f freesp=0 \
-f fsync=0 \
-f getattr=0 \
-f getdents=0 \
-f link=0 \
-f mkdir=0 \
-f mknod=0 \
-f punch=1 \
-f read=0 \
-f readlink=0 \
-f rename=0 \
-f resvsp=0 \
-f rmdir=0 \
-f setxattr=0 \
-f stat=0 \
-f symlink=0 \
-f sync=0 \
-f truncate=1 \
-f unlink=0 \
-f unresvsp=0 \
-f write=4

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Grab error from `em` pointer if available, do the error check and
end_extent_writepage call before unlocking the page (just like
end_bio_extent_writepage does).

 fs/btrfs/extent_io.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs

[PATCH] Btrfs-progs: check, fix csum check in the presence of non-inlined refs

2014-05-07 Thread Filipe David Borba Manana
When we have non-inlined extent references, we were failing to find the
corresponding extent item for an existing csum item in the csum tree.

Reproducer:

   mkfs.btrfs -f /dev/sdd
   mount /dev/sdd /mnt

   xfs_io -f -c falloc 780366 135302 /mnt/foo
   xfs_io -c falloc 327680 151552 /mnt/foo
   xfs_io -c pwrite -S 0xff -b 131072 0 131072 /mnt/foo
   sync

   for i in `seq 1 40`; do btrfs subvolume snapshot /mnt /mnt/snap$i ; done
   umount /mnt

   btrfs check /dev/sdd

The check command exited with status 1 and the following output:

   Checking filesystem on /dev/sdd
   UUID: 2416ab5f-9d71-457e-bb13-a27d4f6b399a
   checking extents
   checking free space cache
   checking fs roots
   checking csums
   There are no extents for csum range 12980224-12984320
   Csum exists for 12980224-12984320 but there is no extent record
   found 1388544 bytes used err is 1
   total csum bytes: 132
   total tree bytes: 704512
   total fs tree bytes: 573440
   total extent tree bytes: 16384
   btree space waste bytes: 564479
   file data blocks allocated: 19341312
referenced 14606336
   Btrfs v3.14.1-94-g80597e7

After this change it no longer erroneously reports a missing extent for the
csum item and exits with a status of 0.

Also added missing btrfs_prev_leaf() return value checks, as we were ignoring
errors and non-existence of left siblings completely.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 cmds-check.c | 38 +++---
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 103efc5..18612c8 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -3650,8 +3650,7 @@ static int check_extent_exists(struct btrfs_root *root, 
u64 bytenr,
 
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
-   key.offset = 0;
-
+   key.offset = (u64)-1;
 
 again:
ret = btrfs_search_slot(NULL, root-fs_info-extent_root, key, path,
@@ -3661,10 +3660,17 @@ again:
btrfs_free_path(path);
return ret;
} else if (ret) {
-   if (path-slots[0])
+   if (path-slots[0]  0) {
path-slots[0]--;
-   else
-   btrfs_prev_leaf(root, path);
+   } else {
+   ret = btrfs_prev_leaf(root, path);
+   if (ret  0) {
+   goto out;
+   } else if (ret  0) {
+   ret = 0;
+   goto out;
+   }
+   }
}
 
btrfs_item_key_to_cpu(path-nodes[0], key, path-slots[0]);
@@ -3674,13 +3680,22 @@ again:
 * bytenr, so walk back one more just in case.  Dear future traveler,
 * first congrats on mastering time travel.  Now if it's not too much
 * trouble could you go back to 2006 and tell Chris to make the
-* BLOCK_GROUP_ITEM_KEY lower than the EXTENT_ITEM_KEY please?
+* BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
+* EXTENT_ITEM_KEY please?
 */
-   if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
-   if (path-slots[0])
+   while (key.type  BTRFS_EXTENT_ITEM_KEY) {
+   if (path-slots[0]  0) {
path-slots[0]--;
-   else
-   btrfs_prev_leaf(root, path);
+   } else {
+   ret = btrfs_prev_leaf(root, path);
+   if (ret  0) {
+   goto out;
+   } else if (ret  0) {
+   ret = 0;
+   goto out;
+   }
+   }
+   btrfs_item_key_to_cpu(path-nodes[0], key, path-slots[0]);
}
 
while (num_bytes) {
@@ -3752,7 +3767,8 @@ again:
}
ret = 0;
 
-   if (num_bytes) {
+out:
+   if (num_bytes  !ret) {
fprintf(stderr, There are no extents for csum range 
%Lu-%Lu\n, bytenr, bytenr+num_bytes);
ret = 1;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: fix leaf corruption caused by ENOSPC while hole punching

2014-04-29 Thread Filipe David Borba Manana
 my other patch to perform the leaf sanity checks once a leaf is marked
as dirty (if the integrity checker is enabled), it would have been much harder
to debug this issue.

This change might fix a few similar issues reported by users in the mailing
list regarding assertion failures in btrfs_set_item_key_safe calls performed
by __btrfs_drop_extents, such as the following report:

http://comments.gmane.org/gmane.comp.file-systems.btrfs/32938

Asking fill_holes() to create a 0 bytes wide file hole item also produced the
first warning in the trace above, as we passed a range to 
btrfs_drop_extent_cache
that has an end smaller (by -1) than its start.

On 3.14 kernels this issue manifests itself through leaf corruption, as we get
duplicated file extent item keys in a leaf when calling 
setup_items_for_insert(),
but on older kernels, setup_items_for_insert() isn't called by 
__btrfs_drop_extents(),
instead we have callers of __btrfs_drop_extents(), namely the functions
inode.c:insert_inline_extent() and inode.c:insert_reserved_file_extent(), 
calling
btrfs_insert_empty_item() to insert the new file extent item, which would fail 
with
error -EEXIST, instead of inserting a duplicated key - which is still a serious
issue as it would make all similar file extent item replace operations keep
failing if they target the same file range.

Cc: sta...@vger.kernel.org
Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Updated commit message to mention difference between 3.14 kernels and older
releases and cc'ed stable. Made the logic in __btrfs_drop_extents simpler
and made it remove any 0 bytes file extent item within the target range, and
not only extent items that have an offset matching search_start.

 fs/btrfs/file.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 49e5fbf..7c3c84f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -785,6 +785,18 @@ next_slot:
extent_end = search_start;
}
 
+   /*
+* Don't skip extent items representing 0 byte lengths. They
+* used to be created (bug) if while punching holes we hit
+* -ENOSPC condition. So if we find one here, just ensure we
+* delete it, otherwise we would insert a new file extent item
+* with the same key (offset) as that 0 bytes length file
+* extent item in the call to setup_items_for_insert() later
+* in this function.
+*/
+   if (extent_end == key.offset  extent_end = search_start)
+   goto delete_extent_item;
+
if (extent_end = search_start) {
path-slots[0]++;
goto next_slot;
@@ -898,6 +910,7 @@ next_slot:
 *| -- extent -- |
 */
if (start = key.offset  end = extent_end) {
+delete_extent_item:
if (del_nr == 0) {
del_slot = path-slots[0];
del_nr = 1;
@@ -2353,7 +2366,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t 
offset, loff_t len)
}
 
trans-block_rsv = root-fs_info-trans_block_rsv;
-   if (cur_offset  ino_size) {
+   /*
+* Don't insert file hole extent item if it's for a range beyond eof
+* (because it's useless) or if it represents a 0 bytes range (when
+* cur_offset == drop_end).
+*/
+   if (cur_offset  ino_size  cur_offset  drop_end) {
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
if (ret) {
err = ret;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: implement inode_operations callback tmpfile

2014-04-27 Thread Filipe David Borba Manana
This implements the tmpfile callback of struct inode_operations, introduced
in the linux kernel 3.11, and implemented already by some filesystems. This
callback is invoked by the VFS when the flag O_TMPFILE is passed to the open
system call.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Updated change log and comment about how many metadata units are needed
for the transaction.
Left the ACL inheritance in the callback (like ext4 does) since the thread
in linux-fsdevel seems to have ended with the conclusion that this is the
right behaviour (as Andreas Gruenbacher says).

 fs/btrfs/inode.c | 118 +--
 1 file changed, 98 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0c0bb45..b5397db 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5549,6 +5549,7 @@ static struct inode *btrfs_new_inode(struct 
btrfs_trans_handle *trans,
struct btrfs_inode_ref *ref;
struct btrfs_key key[2];
u32 sizes[2];
+   int nitems = name ? 2 : 1;
unsigned long ptr;
int ret;
 
@@ -5568,7 +5569,7 @@ static struct inode *btrfs_new_inode(struct 
btrfs_trans_handle *trans,
 */
inode-i_ino = objectid;
 
-   if (dir) {
+   if (dir  name) {
trace_btrfs_inode_request(dir);
 
ret = btrfs_set_inode_index(dir, index);
@@ -5577,6 +5578,8 @@ static struct inode *btrfs_new_inode(struct 
btrfs_trans_handle *trans,
iput(inode);
return ERR_PTR(ret);
}
+   } else if (dir) {
+   *index = 0;
}
/*
 * index_cnt is ignored for everything but a dir,
@@ -5601,21 +5604,24 @@ static struct inode *btrfs_new_inode(struct 
btrfs_trans_handle *trans,
btrfs_set_key_type(key[0], BTRFS_INODE_ITEM_KEY);
key[0].offset = 0;
 
-   /*
-* Start new inodes with an inode_ref. This is slightly more
-* efficient for small numbers of hard links since they will
-* be packed into one item. Extended refs will kick in if we
-* add more hard links than can fit in the ref item.
-*/
-   key[1].objectid = objectid;
-   btrfs_set_key_type(key[1], BTRFS_INODE_REF_KEY);
-   key[1].offset = ref_objectid;
-
sizes[0] = sizeof(struct btrfs_inode_item);
-   sizes[1] = name_len + sizeof(*ref);
+
+   if (name) {
+   /*
+* Start new inodes with an inode_ref. This is slightly more
+* efficient for small numbers of hard links since they will
+* be packed into one item. Extended refs will kick in if we
+* add more hard links than can fit in the ref item.
+*/
+   key[1].objectid = objectid;
+   btrfs_set_key_type(key[1], BTRFS_INODE_REF_KEY);
+   key[1].offset = ref_objectid;
+
+   sizes[1] = name_len + sizeof(*ref);
+   }
 
path-leave_spinning = 1;
-   ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
+   ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
if (ret != 0)
goto fail;
 
@@ -5628,12 +5634,14 @@ static struct inode *btrfs_new_inode(struct 
btrfs_trans_handle *trans,
 sizeof(*inode_item));
fill_inode_item(trans, path-nodes[0], inode_item, inode);
 
-   ref = btrfs_item_ptr(path-nodes[0], path-slots[0] + 1,
-struct btrfs_inode_ref);
-   btrfs_set_inode_ref_name_len(path-nodes[0], ref, name_len);
-   btrfs_set_inode_ref_index(path-nodes[0], ref, *index);
-   ptr = (unsigned long)(ref + 1);
-   write_extent_buffer(path-nodes[0], name, ptr, name_len);
+   if (name) {
+   ref = btrfs_item_ptr(path-nodes[0], path-slots[0] + 1,
+struct btrfs_inode_ref);
+   btrfs_set_inode_ref_name_len(path-nodes[0], ref, name_len);
+   btrfs_set_inode_ref_index(path-nodes[0], ref, *index);
+   ptr = (unsigned long)(ref + 1);
+   write_extent_buffer(path-nodes[0], name, ptr, name_len);
+   }
 
btrfs_mark_buffer_dirty(path-nodes[0]);
btrfs_free_path(path);
@@ -5669,7 +5677,7 @@ static struct inode *btrfs_new_inode(struct 
btrfs_trans_handle *trans,
 
return inode;
 fail:
-   if (dir)
+   if (dir  name)
BTRFS_I(dir)-index_cnt--;
btrfs_free_path(path);
iput(inode);
@@ -5954,6 +5962,15 @@ static int btrfs_link(struct dentry *old_dentry, struct 
inode *dir,
err = btrfs_update_inode(trans, root, inode);
if (err)
goto fail;
+   if (inode-i_nlink == 1) {
+   /*
+* If new hard link count is 1, it's a file created

[PATCH] Btrfs: fix leaf corruption caused by ENOSPC while hole punching

2014-04-27 Thread Filipe David Borba Manana
 my other patch to perform the leaf sanity checks once a leaf is marked
as dirty (if the integrity checker is enabled), it would have been much harder
to debug this issue.

This change might fix a few similar issues reported by users in the mailing
list regarding assertion failures in btrfs_set_item_key_safe calls performed
by __btrfs_drop_extents, such as the following report:

http://comments.gmane.org/gmane.comp.file-systems.btrfs/32938

Asking fill_holes() to create a 0 bytes wide file hole item also produced the
first warning in the trace above, as we passed a range to 
btrfs_drop_extent_cache
that has an end smaller (by -1) than its start.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/file.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 49e5fbf..cac902a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -785,7 +785,17 @@ next_slot:
extent_end = search_start;
}
 
-   if (extent_end = search_start) {
+   /*
+* Don't skip extent items representing 0 byte lengths. They
+* used to be created (bug) if while punching holes we hit
+* -ENOSPC condition. So if we find one here, just ensure we
+* delete it, otherwise we would insert a new file extent item
+* with the same key (offset) as that 0 bytes length file
+* extent item in the call to setup_items_for_insert() later
+* in this function.
+*/
+   if (extent_end = search_start 
+   !(extent_end == key.offset  extent_end == search_start)) {
path-slots[0]++;
goto next_slot;
}
@@ -2353,7 +2363,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t 
offset, loff_t len)
}
 
trans-block_rsv = root-fs_info-trans_block_rsv;
-   if (cur_offset  ino_size) {
+   /*
+* Don't insert file hole extent item if it's for a range beyond eof
+* (because it's useless) or if it represents a 0 bytes range (when
+* cur_offset == drop_end).
+*/
+   if (cur_offset  ino_size  cur_offset  drop_end) {
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
if (ret) {
err = ret;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: read inode size after acquiring the mutex when punching a hole

2014-04-25 Thread Filipe David Borba Manana
In a previous change, commit 12870f1c9b2de7d475d22e73fd7db1b418599725,
I accidentally moved the roundup of inode-i_size to outside of the
critical section delimited by the inode mutex, which is not atomic and
not correct since the size can be changed by other task before we acquire
the mutex. Therefore fix it.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 23f6a9d..efaad37 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2192,13 +2192,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t 
offset, loff_t len)
bool same_page = ((offset  PAGE_CACHE_SHIFT) ==
  ((offset + len - 1)  PAGE_CACHE_SHIFT));
bool no_holes = btrfs_fs_incompat(root-fs_info, NO_HOLES);
-   u64 ino_size = round_up(inode-i_size, PAGE_CACHE_SIZE);
+   u64 ino_size;
 
ret = btrfs_wait_ordered_range(inode, offset, len);
if (ret)
return ret;
 
mutex_lock(inode-i_mutex);
+   ino_size = round_up(inode-i_size, PAGE_CACHE_SIZE);
/*
 * We needn't truncate any page which is beyond the end of the file
 * because we are sure there is no data there.
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: correctly set profile flags on seqlock retry

2014-04-24 Thread Filipe David Borba Manana
If we had to retry on the profiles seqlock (due to a concurrent write), we
would set bits on the input flags that corresponded both to the current
profile and to previous values of the profile.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/extent-tree.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 678cb35..5590af9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3543,11 +3543,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root 
*root, u64 flags)
return extended_to_chunk(flags | tmp);
 }
 
-static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
+static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
 {
unsigned seq;
+   u64 flags;
 
do {
+   flags = orig_flags;
seq = read_seqbegin(root-fs_info-profiles_lock);
 
if (flags  BTRFS_BLOCK_GROUP_DATA)
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: use correct key when repeating search for extent item

2014-04-24 Thread Filipe David Borba Manana
If skinny metadata is enabled and our first tree search fails to find a
skinny extent item, we may repeat a tree search for a fat extent item
(if the previous item in the leaf is not the fat extent we're looking
for). However we were not setting the new key's objectid to the right
value, as we previously used the same key variable to peek at the previous
item in the leaf, which has a different objectid. So just set the right
objectid to avoid modifying/deleting a wrong item if we repeat the tree
search.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/extent-tree.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1306487..678cb35 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1542,6 +1542,7 @@ again:
ret = 0;
}
if (ret) {
+   key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
btrfs_release_path(path);
@@ -5719,6 +5720,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle 
*trans,
 
if (ret  0  skinny_metadata) {
skinny_metadata = false;
+   key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
btrfs_release_path(path);
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/6 v3] Btrfs: send, bump stream version

2014-04-20 Thread Filipe David Borba Manana
This increases the send stream version from version 1 to version 2, adding
new commands:

1) total data size - used to tell the receiver how much file data the stream
   will add or update;

2) fallocate - used to pre-allocate space for files and to punch holes in files;

3) inode set flags;

4) set inode otime.

This is preparation work for subsequent changes that implement the new features.

A version 2 stream is only produced if the send ioctl caller passes in one of 
the
new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | BTRFS_SEND_FLAG_STREAM_V2), 
meaning
old clients are unaffected.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.
V3: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE and added 
BTRFS_SEND_FLAG_STREAM_V2,
added commands for inode set flags and otime.

 fs/btrfs/send.c|  7 ++-
 fs/btrfs/send.h| 21 -
 include/uapi/linux/btrfs.h | 21 -
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 289e9f3..7b4b0c3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -632,7 +632,10 @@ static int send_header(struct send_ctx *sctx)
struct btrfs_stream_header hdr;
 
strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
-   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
+   if (sctx-flags  BTRFS_SEND_FLAG_STREAM_V2)
+   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION_2);
+   else
+   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION_1);
 
return write_buf(sctx-send_filp, hdr, sizeof(hdr),
sctx-send_off);
@@ -5554,6 +5557,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user 
*arg_)
INIT_LIST_HEAD(sctx-name_cache_list);
 
sctx-flags = arg-flags;
+   if (sctx-flags  BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE)
+   sctx-flags |= BTRFS_SEND_FLAG_STREAM_V2;
 
sctx-send_filp = fget(arg-send_fd);
if (!sctx-send_filp) {
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 48d425a..96f583c 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -20,7 +20,8 @@
 #include ctree.h
 
 #define BTRFS_SEND_STREAM_MAGIC btrfs-stream
-#define BTRFS_SEND_STREAM_VERSION 1
+#define BTRFS_SEND_STREAM_VERSION_1 1
+#define BTRFS_SEND_STREAM_VERSION_2 2
 
 #define BTRFS_SEND_BUF_SIZE (1024 * 64)
 #define BTRFS_SEND_READ_SIZE (1024 * 48)
@@ -87,6 +88,15 @@ enum btrfs_send_cmd {
 
BTRFS_SEND_C_END,
BTRFS_SEND_C_UPDATE_EXTENT,
+
+   /*
+* The following commands were added in stream version 2.
+*/
+   BTRFS_SEND_C_TOTAL_DATA_SIZE,
+   BTRFS_SEND_C_FALLOCATE,
+   BTRFS_SEND_C_INODE_SET_FLAGS,
+   BTRFS_SEND_C_UTIMES2, /* Same as UTIMES, but it includes OTIME too. */
+
__BTRFS_SEND_C_MAX,
 };
 #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
@@ -125,10 +135,19 @@ enum {
BTRFS_SEND_A_CLONE_OFFSET,
BTRFS_SEND_A_CLONE_LEN,
 
+   /*
+* The following attributes were added in stream version 2.
+*/
+   BTRFS_SEND_A_FALLOCATE_FLAGS, /* 32 bits */
+   BTRFS_SEND_A_INODE_FLAGS, /* 32 bits */
+
__BTRFS_SEND_A_MAX,
 };
 #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
 
+#define BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE   (1  0)
+#define BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE  (1  1)
+
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
 #endif
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index b4d6909..8ab2761 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -464,10 +464,29 @@ struct btrfs_ioctl_received_subvol_args {
  */
 #define BTRFS_SEND_FLAG_OMIT_END_CMD   0x4
 
+/*
+ * Calculate the amount (in bytes) of new file data between the send and
+ * parent snapshots, or in case of a full send, the total amount of file data
+ * we will send.
+ * This corresponds to the sum of the data lengths of each write, clone and
+ * fallocate commands that are sent through the send stream. The receiving end
+ * can use this information to compute progress.
+ *
+ * Added in send stream version 2, and implies producing a version 2 stream.
+ */
+#define BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE0x8
+
+/*
+ * Used by a client to request a version 2 of the send stream.
+ */
+#define BTRFS_SEND_FLAG_STREAM_V2  0x10
+
 #define BTRFS_SEND_FLAG_MASK \
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
 BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
-BTRFS_SEND_FLAG_OMIT_END_CMD)
+BTRFS_SEND_FLAG_OMIT_END_CMD | \
+BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | \
+BTRFS_SEND_FLAG_STREAM_V2)
 
 struct btrfs_ioctl_send_args {
__s64 send_fd

[PATCH 2/6 v3] Btrfs: send, implement total data size command to allow for progress estimation

2014-04-20 Thread Filipe David Borba Manana
This new send flag makes send calculate first the amount of new file data (in 
bytes)
the send root has relatively to the parent root, or for the case of a 
non-incremental
send, the total amount of file data the stream will create (including holes and 
prealloc
extents). In other words, it computes the sum of the lengths of all write, 
clone and
fallocate operations that will be sent through the send stream.

This data size value is sent in a new command, named 
BTRFS_SEND_C_TOTAL_DATA_SIZE, that
immediately follows a BTRFS_SEND_C_SUBVOL or BTRFS_SEND_C_SNAPSHOT command, and 
precedes
any command that changes a file or the filesystem hierarchy. Upon receiving a 
write, clone
or fallocate command, the receiving end can increment a counter by the data 
length of that
command and therefore report progress by comparing the counter's value with the 
data size
value received in the BTRFS_SEND_C_TOTAL_DATA_SIZE command.

The approach is simple, before the normal operation of send, do a scan in the 
file system
tree for new inodes and new/changed file extent items, just like in send's 
normal operation,
and keep incrementing a counter with new inodes' size and the size of file 
extents (and file
holes)  that are going to be written, cloned or fallocated. This is actually a 
simpler and
more lightweight tree scan/processing than the one we do when sending the 
changes, as it
doesn't process inode references nor does any lookups in the extent tree for 
example.

After modifying btrfs-progs to understand this new command and report progress, 
here's an
example (the -o flag tells btrfs send to pass the new flag to the kernel's send 
ioctl):

$ btrfs send -s --stream-version 2 /mnt/sdd/snap_base | btrfs receive 
/mnt/sdc
At subvol /mnt/sdd/snap_base
At subvol snap_base
About to receive 9212392667 bytes
Subvolume /mnt/sdc//snap_base, 4059722426 / 9212392667 bytes received, 
44.07%, 40.32MB/s

$ btrfs send -s --stream-version 2 -p /mnt/sdd/snap_base /mnt/sdd/snap_incr 
| btrfs receive /mnt/sdc
At subvol /mnt/sdd/snap_incr
At subvol snap_incr
About to receive 9571342213 bytes
Subvolume /mnt/sdc//snap_incr, 6557345221 / 9571342213 bytes received, 
68.51%, 51.04MB/s

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.
V3: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE and added 
BTRFS_SEND_FLAG_STREAM_V2,
added commands for inode set flags and otime.

 fs/btrfs/send.c | 194 ++--
 1 file changed, 162 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 7b4b0c3..2a52cc9 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -81,7 +81,13 @@ struct clone_root {
 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128
 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
 
+enum btrfs_send_phase {
+   SEND_PHASE_STREAM_CHANGES,
+   SEND_PHASE_COMPUTE_DATA_SIZE,
+};
+
 struct send_ctx {
+   enum btrfs_send_phase phase;
struct file *send_filp;
loff_t send_off;
char *send_buf;
@@ -116,6 +122,7 @@ struct send_ctx {
u64 cur_inode_last_extent;
 
u64 send_progress;
+   u64 total_data_size;
 
struct list_head new_refs;
struct list_head deleted_refs;
@@ -691,6 +698,8 @@ static int send_rename(struct send_ctx *sctx,
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_rename %s - %s\n, from-start, to-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
@@ -715,6 +724,8 @@ static int send_link(struct send_ctx *sctx,
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_link %s - %s\n, path-start, lnk-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
@@ -738,6 +749,8 @@ static int send_unlink(struct send_ctx *sctx, struct 
fs_path *path)
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_unlink %s\n, path-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
@@ -760,6 +773,8 @@ static int send_rmdir(struct send_ctx *sctx, struct fs_path 
*path)
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_rmdir %s\n, path-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
@@ -2307,6 +2322,9 @@ static int send_truncate(struct send_ctx *sctx, u64 ino, 
u64 gen, u64 size)
int ret = 0;
struct fs_path *p;
 
+   if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE)
+   return 0;
+
 verbose_printk(btrfs: send_truncate %llu size=%llu\n, ino, size);
 
p = fs_path_alloc();
@@ -2336,6 +2354,8 @@ static int send_chmod(struct send_ctx *sctx, u64

[PATCH 3/6 v4] Btrfs: send, use fallocate command to punch holes

2014-04-20 Thread Filipe David Borba Manana
Instead of sending a write command with a data buffer filled with 0 value bytes,
use the fallocate command, introduced in the send stream version 2, to tell the
receiver to punch a file hole using the fallocate system call.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.
V3: Added missing path allocation, messed up rebase.
V4: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE and added 
BTRFS_SEND_FLAG_STREAM_V2,
added commands for inode set flags and otime.

 fs/btrfs/send.c | 55 ---
 fs/btrfs/send.h |  4 
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2a52cc9..e57000b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -564,6 +564,7 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const 
void *data, int len)
return tlv_put(sctx, attr, __tmp, sizeof(__tmp));  \
}
 
+TLV_PUT_DEFINE_INT(32)
 TLV_PUT_DEFINE_INT(64)
 
 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
@@ -4482,18 +4483,59 @@ out:
return ret;
 }
 
+static int send_fallocate(struct send_ctx *sctx, u32 flags,
+ u64 offset, u64 len)
+{
+   struct fs_path *p = NULL;
+   int ret = 0;
+
+   ASSERT(sctx-flags  BTRFS_SEND_FLAG_STREAM_V2);
+
+   if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
+   sctx-total_data_size += len;
+   return 0;
+   }
+
+   p = fs_path_alloc();
+   if (!p)
+   return -ENOMEM;
+   ret = get_cur_path(sctx, sctx-cur_ino, sctx-cur_inode_gen, p);
+   if (ret  0)
+   goto out;
+
+   ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
+   if (ret  0)
+   goto out;
+   TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+   TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_FLAGS, flags);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
+   ret = send_cmd(sctx);
+
+tlv_put_failure:
+out:
+   fs_path_free(p);
+   return ret;
+}
+
 static int send_hole(struct send_ctx *sctx, u64 end)
 {
struct fs_path *p = NULL;
u64 offset = sctx-cur_inode_last_extent;
-   u64 len;
+   u64 len = end - offset;
int ret = 0;
 
if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
-   sctx-total_data_size += end - offset;
+   sctx-total_data_size += len;
return 0;
}
 
+   if (sctx-flags  BTRFS_SEND_FLAG_STREAM_V2)
+   return send_fallocate(sctx,
+ BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+ offset,
+ len);
+
p = fs_path_alloc();
if (!p)
return -ENOMEM;
@@ -4550,7 +4592,8 @@ static int send_write_or_clone(struct send_ctx *sctx,
len = btrfs_file_extent_num_bytes(path-nodes[0], ei);
}
 
-   if (offset + len  sctx-cur_inode_size)
+   if (offset  sctx-cur_inode_size 
+   offset + len  sctx-cur_inode_size)
len = sctx-cur_inode_size - offset;
if (len == 0) {
ret = 0;
@@ -4567,6 +4610,12 @@ static int send_write_or_clone(struct send_ctx *sctx,
ret = send_clone(sctx, offset, len, clone_root);
} else if (sctx-flags  BTRFS_SEND_FLAG_NO_FILE_DATA) {
ret = send_update_extent(sctx, offset, len);
+   } else if (btrfs_file_extent_disk_bytenr(path-nodes[0], ei) == 0 
+  type != BTRFS_FILE_EXTENT_INLINE 
+  (sctx-flags  BTRFS_SEND_FLAG_STREAM_V2) 
+  offset  sctx-cur_inode_size) {
+   ret = send_fallocate(sctx, BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+offset, len);
} else {
while (pos  len) {
l = len - pos;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 96f583c..987936c 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -148,6 +148,10 @@ enum {
 #define BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE   (1  0)
 #define BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE  (1  1)
 
+#define BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS\
+   (BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE |  \
+BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE)
+
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
 #endif
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/6 v4] Btrfs: send, use fallocate command to allocate extents

2014-04-20 Thread Filipe David Borba Manana
The send stream version 2 adds the fallocate command, which can be used to
allocate extents for a file or punch holes in a file. Previously we were
ignoring file prealloc extents or treating them as extents filled with 0
bytes and sending a regular write command to the stream.

After this change, together with my previous change titled:

Btrfs: send, use fallocate command to punch holes

an incremental send preserves the hole and data structure of files, which can
be seen via calls to lseek with the whence parameter set to SEEK_DATA or 
SEEK_HOLE,
as the example below shows:

mkfs.btrfs -f /dev/sdc
mount /dev/sdc /mnt
xfs_io -f -c pwrite -S 0x01 -b 30 0 30 /mnt/foo
btrfs subvolume snapshot -r /mnt /mnt/mysnap1

xfs_io -c fpunch 10 5 /mnt/foo
xfs_io -c falloc 10 5 /mnt/foo
xfs_io -c pwrite -S 0xff -b 1000 12 1000 /mnt/foo
xfs_io -c fpunch 25 2 /mnt/foo

# prealloc extents that start beyond the inode's size
xfs_io -c falloc -k 30 100 /mnt/foo
xfs_io -c falloc -k 900 200 /mnt/foo

btrfs subvolume snapshot -r /mnt /mnt/mysnap2

btrfs send /mnt/mysnap1 -f /tmp/1.snap
btrfs send -p /mnt/mysnap1 /mnt/mysnap2 -f /tmp/2.snap

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt2
btrfs receive /mnt2 -f /tmp/1.snap
btrfs receive /mnt2 -f /tmp/2.snap

Before this change the hole/data structure differed between both filesystems:

$ xfs_io -r -c 'seek -r -a 0' /mnt/mysnap2/foo
Whence  Result
DATA0
HOLE102400
DATA118784
HOLE122880
DATA147456
HOLE253952
DATA266240
HOLE30

$ xfs_io -r -c 'seek -r -a 0' /mnt2/mysnap2/foo
Whence  Result
DATA0
HOLE30

After this change the second filesystem (/dev/sdd) ends up with the same 
hole/data
structure as the first filesystem.

Also, after this change, prealloc extents that lie beyond the inode's size (were
allocated with fallocate + keep size flag) are also replicated by an incremental
send. For the above test, it can be observed via fiemap (or btrfs-debug-tree):

$ xfs_io -r -c 'fiemap -l' /mnt2/mysnap2/foo
0: [0..191]: 25096..25287 192 blocks
1: [192..199]: 24672..24679 8 blocks
2: [200..231]: 24584..24615 32 blocks
3: [232..239]: 24680..24687 8 blocks
4: [240..287]: 24616..24663 48 blocks
5: [288..295]: 24688..24695 8 blocks
6: [296..487]: 25392..25583 192 blocks
7: [488..495]: 24696..24703 8 blocks
8: [496..519]: hole 24 blocks
9: [520..527]: 24704..24711 8 blocks
10: [528..583]: 25624..25679 56 blocks
11: [584..591]: 24712..24719 8 blocks
12: [592..2543]: 26192..28143 1952 blocks
13: [2544..17575]: hole 15032 blocks
14: [17576..21487]: 28144..32055 3912 blocks

A test case for xfstests will follow.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Added new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE. A version 2
stream is now only produced is the ioctl caller specifies at least one of
the new send flags (BTRFS_SEND_FLAG_SUPPORT_FALLOCATE or
BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE).
V3: Fixed rebase, removed some duplicate logic on truncate + falloc -k.
V4: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE and added 
BTRFS_SEND_FLAG_STREAM_V2,
added commands for inode set flags and otime.

 fs/btrfs/send.c | 78 +
 1 file changed, 57 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e57000b..d6c9466 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -113,9 +113,10 @@ struct send_ctx {
 */
u64 cur_ino;
u64 cur_inode_gen;
-   int cur_inode_new;
-   int cur_inode_new_gen;
-   int cur_inode_deleted;
+   u8 cur_inode_new:1;
+   u8 cur_inode_new_gen:1;
+   u8 cur_inode_skip_truncate:1;
+   u8 cur_inode_deleted:1;
u64 cur_inode_size;
u64 cur_inode_mode;
u64 cur_inode_rdev;
@@ -4562,6 +4563,19 @@ tlv_put_failure:
return ret;
 }
 
+static int truncate_before_falloc(struct send_ctx *sctx)
+{
+   int ret = 0;
+
+   if (!sctx-cur_inode_skip_truncate) {
+   ret = send_truncate(sctx, sctx-cur_ino,
+   sctx-cur_inode_gen,
+   sctx-cur_inode_size);
+   sctx-cur_inode_skip_truncate = 1;
+   }
+   return ret;
+}
+
 static int send_write_or_clone(struct send_ctx *sctx,
   struct btrfs_path *path,
   struct btrfs_key *key,
@@ -4601,8 +4615,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
}
 
if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
-   if (offset  sctx-cur_inode_size)
-   sctx-total_data_size += len;
+   sctx

[PATCH 5/6] Btrfs: add missing cleanup on sysfs init failure

2014-04-20 Thread Filipe David Borba Manana
If we failed during initialization of sysfs, we weren't unregistering the
top level btrfs sysfs entry nor the debugfs stuff.
Not unregistering the top level sysfs entry makes future attempts to reload
the btrfs module impossible and the following is reported in dmesg:

[ 2246.451296] WARNING: CPU: 3 PID: 10999 at fs/sysfs/dir.c:486 
sysfs_warn_dup+0x91/0xb0()
[ 2246.451298] sysfs: cannot create duplicate filename '/fs/btrfs'
[ 2246.451298] Modules linked in: btrfs(+) raid6_pq xor bnep rfcomm bluetooth 
binfmt_misc nfsd auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc 
parport_pc parport psmouse serio_raw pcspkr evbug i2c_piix4 e1000 floppy [last 
unloaded: btrfs]
[ 2246.451310] CPU: 3 PID: 10999 Comm: modprobe Tainted: GW
3.13.0-fdm-btrfs-next-24+ #7
[ 2246.451311] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 2246.451312]  0009 8800d353fa08 816f1da6 
0410
[ 2246.451314]  8800d353fa58 8800d353fa48 8104a32c 
88020821a290
[ 2246.451316]  88020821a290 88020821a290 8802148f 
8800d353fb80
[ 2246.451318] Call Trace:
[ 2246.451322]  [816f1da6] dump_stack+0x4e/0x68
[ 2246.451324]  [8104a32c] warn_slowpath_common+0x8c/0xc0
[ 2246.451325]  [8104a416] warn_slowpath_fmt+0x46/0x50
[ 2246.451328]  [81367dc5] ? strlcat+0x65/0x90
()

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/sysfs.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c5eb214..58a1dd1 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -672,10 +672,18 @@ int btrfs_init_sysfs(void)
 
ret = btrfs_init_debugfs();
if (ret)
-   return ret;
+   goto out1;
 
init_feature_attrs();
ret = sysfs_create_group(btrfs_kset-kobj, btrfs_feature_attr_group);
+   if (ret)
+   goto out2;
+
+   return 0;
+out2:
+   debugfs_remove_recursive(btrfs_debugfs_root_dentry);
+out1:
+   kset_unregister(btrfs_kset);
 
return ret;
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/6] Btrfs: add send_stream_version attribute to sysfs

2014-04-20 Thread Filipe David Borba Manana
So that applications can find out what's the highest send stream
version supported/implemented by the running kernel:

$ cat /sys/fs/btrfs/send_stream_version
2

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/send.h  |  1 +
 fs/btrfs/sysfs.c | 36 
 2 files changed, 37 insertions(+)

diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 987936c..047fd6d 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -22,6 +22,7 @@
 #define BTRFS_SEND_STREAM_MAGIC btrfs-stream
 #define BTRFS_SEND_STREAM_VERSION_1 1
 #define BTRFS_SEND_STREAM_VERSION_2 2
+#define BTRFS_SEND_STREAM_VERSION_LATEST BTRFS_SEND_STREAM_VERSION_2
 
 #define BTRFS_SEND_BUF_SIZE (1024 * 64)
 #define BTRFS_SEND_READ_SIZE (1024 * 48)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 58a1dd1..2f8fff6 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -31,6 +31,7 @@
 #include transaction.h
 #include sysfs.h
 #include volumes.h
+#include send.h
 
 static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
 
@@ -662,6 +663,36 @@ static int btrfs_init_debugfs(void)
return 0;
 }
 
+static ssize_t send_stream_version_show(struct kobject *kobj,
+   struct kobj_attribute *a,
+   char *buf)
+{
+   return snprintf(buf, PAGE_SIZE, %d\n,
+   BTRFS_SEND_STREAM_VERSION_LATEST);
+}
+
+BTRFS_ATTR(send_stream_version, 0444, send_stream_version_show);
+
+static const struct attribute *btrfs_root_attrs[] = {
+   BTRFS_ATTR_PTR(send_stream_version),
+   NULL
+};
+
+static int add_root_attrs(struct kobject *kobj)
+{
+   int error = 0;
+   int i;
+
+   for (i = 0; btrfs_root_attrs[i]; i++) {
+   const struct attribute *a = btrfs_root_attrs[i];
+
+   error = sysfs_add_file_to_group(kobj-parent, a, kobj-name);
+   if (error)
+   break;
+   }
+   return error;
+}
+
 int btrfs_init_sysfs(void)
 {
int ret;
@@ -678,8 +709,13 @@ int btrfs_init_sysfs(void)
ret = sysfs_create_group(btrfs_kset-kobj, btrfs_feature_attr_group);
if (ret)
goto out2;
+   ret = add_root_attrs(btrfs_kset-kobj);
+   if (ret)
+   goto out3;
 
return 0;
+out3:
+   sysfs_remove_group(btrfs_kset-kobj, btrfs_feature_attr_group);
 out2:
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
 out1:
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4 v3] Btrfs-progs: send, bump stream version

2014-04-20 Thread Filipe David Borba Manana
This increases the send stream version from version 1 to version 2, adding
new commands:

1) total data size - used to tell the receiver how much file data the stream
   will add or update;

2) fallocate - used to pre-allocate space for files and to punch holes in files;

3) inode set flags;

4) set inode otime.

This is preparation work for subsequent changes that implement the new features.

This doesn't break compatibility with older kernels or clients. In order to get
a version 2 send stream, new flags must be passed to the send ioctl.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Added new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE. A version 2
stream is now only produced is the ioctl caller specifies at least one of
the new send flags (BTRFS_SEND_FLAG_SUPPORT_FALLOCATE or
BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE).
V3: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE flag and -a command line option
for btrfs-send. Both were replaced with BTRFS_SEND_FLAG_STREAM_V2 and
--stream-version=version_number respectively. Added commands for inode
sets flags and otime too.

 Documentation/btrfs-send.txt |  3 +++
 cmds-send.c  | 57 ++--
 ioctl.h  | 15 
 send-stream.c|  2 +-
 send.h   | 23 +-
 5 files changed, 85 insertions(+), 15 deletions(-)

diff --git a/Documentation/btrfs-send.txt b/Documentation/btrfs-send.txt
index 18a98fa..067fc27 100644
--- a/Documentation/btrfs-send.txt
+++ b/Documentation/btrfs-send.txt
@@ -40,6 +40,9 @@ Use this snapshot as a clone source for an incremental send 
(multiple allowed).
 -f outfile::
 Output is normally written to stdout. To write to a file, use this option.
 An alternative would be to use pipes.
+--stream-version version::
+Ask the kernel to produce a specific send stream version. More recent stream 
versions provide
+new features and better performance. Default value is 1.
 
 EXIT STATUS
 ---
diff --git a/cmds-send.c b/cmds-send.c
index 1cd457d..bd575f8 100644
--- a/cmds-send.c
+++ b/cmds-send.c
@@ -32,6 +32,7 @@
 #include libgen.h
 #include mntent.h
 #include assert.h
+#include getopt.h
 
 #include uuid/uuid.h
 
@@ -45,6 +46,7 @@
 #include send-utils.h
 
 static int g_verbose = 0;
+static int g_stream_version = BTRFS_SEND_STREAM_VERSION_1;
 
 struct btrfs_send {
int send_fd;
@@ -281,6 +283,8 @@ static int do_send(struct btrfs_send *send, u64 
parent_root_id,
io_send.flags |= BTRFS_SEND_FLAG_OMIT_STREAM_HEADER;
if (!is_last_subvol)
io_send.flags |= BTRFS_SEND_FLAG_OMIT_END_CMD;
+   if (g_stream_version == BTRFS_SEND_STREAM_VERSION_2)
+   io_send.flags |= BTRFS_SEND_FLAG_STREAM_V2;
ret = ioctl(subvol_fd, BTRFS_IOC_SEND, io_send);
if (ret) {
ret = -errno;
@@ -406,6 +410,11 @@ out:
return ret;
 }
 
+static const struct option long_options[] = {
+   { stream-version, 1, NULL, 'V' },
+   { NULL, 0, NULL, 0 }
+};
+
 int cmd_send(int argc, char **argv)
 {
char *subvol = NULL;
@@ -424,7 +433,8 @@ int cmd_send(int argc, char **argv)
memset(send, 0, sizeof(send));
send.dump_fd = fileno(stdout);
 
-   while ((c = getopt(argc, argv, vec:f:i:p:)) != -1) {
+   while ((c = getopt_long(argc, argv, vec:f:i:p:,
+   long_options, NULL)) != -1) {
switch (c) {
case 'v':
g_verbose++;
@@ -511,6 +521,24 @@ int cmd_send(int argc, char **argv)
ERROR: -i was removed, use -c instead\n);
ret = 1;
goto out;
+   case 'V':
+   if (sscanf(optarg, %d, g_stream_version) != 1) {
+   fprintf(stderr,
+   ERROR: invalid value for stream 
version: %s\n,
+   optarg);
+   ret = 1;
+   goto out;
+   }
+   if (g_stream_version = 0 ||
+   g_stream_version  BTRFS_SEND_STREAM_VERSION_MAX) {
+   fprintf(stderr,
+   ERROR: unsupported stream version %d, 
minimum: 1, maximum: %d\n,
+   g_stream_version,
+   BTRFS_SEND_STREAM_VERSION_MAX);
+   ret = 1;
+   goto out;
+   }
+   break;
case '?':
default:
fprintf(stderr, ERROR: send args invalid.\n);
@@ -673,7 +701,7 @@ out:
 }
 
 const char * const cmd_send_usage[] = {
-   btrfs send [-ve] [-p parent] [-c clone-src] [-f outfile] 
subvol [subvol

[PATCH 2/4 v4] Btrfs-progs: send, implement total data size callback and progress report

2014-04-20 Thread Filipe David Borba Manana
This is a followup to the kernel patch titled:

Btrfs: send, implement total data size command to allow for progress 
estimation

This makes the btrfs send and receive commands aware of the new send flag,
named BTRFS_SEND_C_TOTAL_DATA_SIZE, which tells us the amount of file data
that is new between the parent and send snapshots/roots. As this command
immediately follows the commands to start a snapshot/subvolume, it can be
used to report and compute progress, by keeping a counter that is incremented
with the data length of each write, clone and fallocate command that is received
from the stream.

Example:

$ btrfs send -s --stream-version 2 /mnt/sdd/snap_base | btrfs receive 
/mnt/sdc
At subvol /mnt/sdd/snap_base
At subvol snap_base
About to receive 9212392667 bytes
Subvolume /mnt/sdc//snap_base, 4059722426 / 9212392667 bytes received, 
44.07%, 40.32MB/s

$ btrfs send -s --stream-version 2 -p /mnt/sdd/snap_base /mnt/sdd/snap_incr 
| btrfs receive /mnt/sdc
At subvol /mnt/sdd/snap_incr
At subvol snap_incr
About to receive 9571342213 bytes
Subvolume /mnt/sdc//snap_incr, 6557345221 / 9571342213 bytes received, 
68.51%, 51.04MB/s

At the moment progress is only reported by btrfs-receive, but it is possible 
and simple
to do it for btrfs-send too, so that we can get progress report when not piping 
btrfs-send
output to btrfs-receive (directly to a file).

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Added new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE. A version 2
stream is now only produced is the ioctl caller specifies at least one of
the new send flags (BTRFS_SEND_FLAG_SUPPORT_FALLOCATE or
BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE).
V3: Renamed option -o to -s, removed some duplicated code (progress reset).
V4: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE flag and -a command line option
for btrfs-send. Both were replaced with BTRFS_SEND_FLAG_STREAM_V2 and
--stream-version=version_number respectively. Added commands for inode
set flags and otime too.

 Documentation/btrfs-send.txt |  4 ++
 cmds-receive.c   | 91 
 cmds-send.c  | 23 ++-
 send-stream.c|  4 ++
 send-stream.h|  1 +
 5 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/Documentation/btrfs-send.txt b/Documentation/btrfs-send.txt
index 067fc27..1b18d32 100644
--- a/Documentation/btrfs-send.txt
+++ b/Documentation/btrfs-send.txt
@@ -43,6 +43,10 @@ An alternative would be to use pipes.
 --stream-version version::
 Ask the kernel to produce a specific send stream version. More recent stream 
versions provide
 new features and better performance. Default value is 1.
+-s::
+Obtain the total data size for each subvolume or snapshot to send. This 
demands additional
+processing (mostly IO bound) but is useful for the receive command to report 
progress. This
+option requires send stream version 2 or higher.
 
 EXIT STATUS
 ---
diff --git a/cmds-receive.c b/cmds-receive.c
index d6cd3da..bd5255c 100644
--- a/cmds-receive.c
+++ b/cmds-receive.c
@@ -32,6 +32,7 @@
 #include ftw.h
 #include wait.h
 #include assert.h
+#include time.h
 
 #include sys/stat.h
 #include sys/types.h
@@ -71,6 +72,14 @@ struct btrfs_receive
struct subvol_uuid_search sus;
 
int honor_end_cmd;
+
+   /* For the subvolume/snapshot we're currently receiving. */
+   u64 total_data_size;
+   u64 bytes_received;
+   time_t last_progress_update;
+   u64 bytes_received_last_update;
+   float progress;
+   const char *target;
 };
 
 static int finish_subvol(struct btrfs_receive *r)
@@ -143,6 +152,16 @@ out:
return ret;
 }
 
+static void reset_progress(struct btrfs_receive *r, const char *dest)
+{
+   r-total_data_size = 0;
+   r-bytes_received = 0;
+   r-progress = 0.0;
+   r-last_progress_update = 0;
+   r-bytes_received_last_update = 0;
+   r-target = dest;
+}
+
 static int process_subvol(const char *path, const u8 *uuid, u64 ctransid,
  void *user)
 {
@@ -156,6 +175,7 @@ static int process_subvol(const char *path, const u8 *uuid, 
u64 ctransid,
goto out;
 
r-cur_subvol = calloc(1, sizeof(*r-cur_subvol));
+   reset_progress(r, Subvolume);
 
if (strlen(r-dest_dir_path) == 0)
r-cur_subvol-path = strdup(path);
@@ -205,6 +225,7 @@ static int process_snapshot(const char *path, const u8 
*uuid, u64 ctransid,
goto out;
 
r-cur_subvol = calloc(1, sizeof(*r-cur_subvol));
+   reset_progress(r, Snapshot);
 
if (strlen(r-dest_dir_path) == 0)
r-cur_subvol-path = strdup(path);
@@ -287,6 +308,73 @@ out:
return ret;
 }
 
+static int process_total_data_size(u64 size, void *user)
+{
+   struct btrfs_receive *r = user;
+
+   r-total_data_size = size;
+   fprintf(stdout, About

[PATCH 3/4 v4] Btrfs-progs: send, implement fallocate command callback

2014-04-20 Thread Filipe David Borba Manana
The fallocate send stream command, added in stream version 2, is used to
pre-allocate space for files and punch file holes. This change implements
the callback for that new command, using the fallocate function from the
standard C library to carry out the specified action (allocate file space
or punch a file hole).

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Use the new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE if the user
asks for it (-a command line option), which will make the kernel generate
a version 2 send stream, so that old clients aren't affected.
V3: Rebased on new patchset (new version of patch 2/4).
V4: Removed BTRFS_SEND_FLAG_SUPPORT_FALLOCATE flag and -a command line option
for btrfs-send. Both were replaced with BTRFS_SEND_FLAG_STREAM_V2 and
--stream-version=version_number respectively. Added commands for inode
set flags and otime too.

 cmds-receive.c | 38 ++
 send-stream.c  | 13 +
 send-stream.h  |  2 ++
 3 files changed, 53 insertions(+)

diff --git a/cmds-receive.c b/cmds-receive.c
index bd5255c..5e96423 100644
--- a/cmds-receive.c
+++ b/cmds-receive.c
@@ -41,6 +41,7 @@
 #include sys/types.h
 #include sys/xattr.h
 #include uuid/uuid.h
+#include linux/falloc.h
 
 #include ctree.h
 #include ioctl.h
@@ -887,6 +888,42 @@ out:
return ret;
 }
 
+static int process_fallocate(const char *path, u32 flags, u64 offset,
+u64 len, void *user)
+{
+   struct btrfs_receive *r = user;
+   char *full_path = path_cat(r-full_subvol_path, path);
+   int mode = 0;
+   int ret;
+
+   if (flags  BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE)
+   mode |= FALLOC_FL_KEEP_SIZE;
+   if (flags  BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE)
+   mode |= FALLOC_FL_PUNCH_HOLE;
+
+   if (g_verbose = 2)
+   fprintf(stderr,
+   fallocate %s - flags %u, offset %llu, len %llu\n,
+   path, flags, offset, len);
+
+   ret = open_inode_for_write(r, full_path);
+   if (ret  0)
+   goto out;
+
+   ret = fallocate(r-write_fd, mode, offset, len);
+   if (ret) {
+   ret = -errno;
+   fprintf(stderr,
+   ERROR: fallocate against %s failed. %s\n,
+   path, strerror(-ret));
+   goto out;
+   }
+   update_progress(r, len);
+
+out:
+   free(full_path);
+   return ret;
+}
 
 static struct btrfs_send_ops send_ops = {
.subvol = process_subvol,
@@ -910,6 +947,7 @@ static struct btrfs_send_ops send_ops = {
.chown = process_chown,
.utimes = process_utimes,
.total_data_size = process_total_data_size,
+   .fallocate = process_fallocate,
 };
 
 static int do_receive(struct btrfs_receive *r, const char *tomnt, int r_fd)
diff --git a/send-stream.c b/send-stream.c
index e1bd4ce..812639f 100644
--- a/send-stream.c
+++ b/send-stream.c
@@ -425,6 +425,19 @@ static int read_and_process_cmd(struct btrfs_send_stream 
*s)
TLV_GET_U64(s, BTRFS_SEND_A_SIZE, tmp);
ret = s-ops-total_data_size(tmp, s-user);
break;
+   case BTRFS_SEND_C_FALLOCATE:
+   {
+   u32 flags;
+   u64 len;
+
+   TLV_GET_STRING(s, BTRFS_SEND_A_PATH, path);
+   TLV_GET_U32(s, BTRFS_SEND_A_FALLOCATE_FLAGS, flags);
+   TLV_GET_U64(s, BTRFS_SEND_A_FILE_OFFSET, offset);
+   TLV_GET_U64(s, BTRFS_SEND_A_SIZE, len);
+   ret = s-ops-fallocate(path, flags, offset, len,
+   s-user);
+   }
+   break;
case BTRFS_SEND_C_END:
ret = 1;
break;
diff --git a/send-stream.h b/send-stream.h
index 3a653a9..479e40c 100644
--- a/send-stream.h
+++ b/send-stream.h
@@ -55,6 +55,8 @@ struct btrfs_send_ops {
  void *user);
int (*update_extent)(const char *path, u64 offset, u64 len, void *user);
int (*total_data_size)(u64 size, void *user);
+   int (*fallocate)(const char *path, u32 flags, u64 offset,
+u64 len, void *user);
 };
 
 int btrfs_read_and_process_send_stream(int fd,
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4] xfstests: btrfs, test send's ability to punch holes and prealloc extents

2014-04-20 Thread Filipe David Borba Manana
This test verifies that after an incremental btrfs send the
replicated file has the same exact hole and data structure as in
the origin filesystem. This didn't use to be the case before the
send stream version 2 - holes were sent as write operations of 0
valued bytes instead of punching holes with the fallocate system
call, and pre-allocated extents were sent as well as write
operations of 0 valued bytes instead of intructions for the
receiver to use the fallocate system call.

It also checks that prealloc extents that lie beyond the file's
size are replicated by an incremental send.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Addressed Dave's comments, and updated btrfs send invocation, by specifying
the new command line option (-a) that enables use of fallocate - added
function _require_btrfs_send_fallocate_flag() to skip the test when an old
version of btrfs-progs is found.
V3: Corrected use of fiemap with _filter_fiemap. Was passing -l instead of -v
to fiemap, which resulted in output consisting only of a single line related
to a hole instead of all holes and data extents (and I wanted to verify the
falloc -k extents were preserved after the btrfs send).
V4: Updated invocation of btrfs send, as the flag -a was removed from btrfs-send
in favour of --stream-version=version

 common/rc   |   9 
 tests/btrfs/047 | 122 
 tests/btrfs/047.out |  35 +++
 tests/btrfs/group   |   1 +
 4 files changed, 167 insertions(+)
 create mode 100755 tests/btrfs/047
 create mode 100644 tests/btrfs/047.out

diff --git a/common/rc b/common/rc
index acf419b..a9d1c4c 100644
--- a/common/rc
+++ b/common/rc
@@ -2262,6 +2262,15 @@ _run_btrfs_util_prog()
run_check $BTRFS_UTIL_PROG $*
 }
 
+_require_btrfs_send_stream_version()
+{
+   $BTRFS_UTIL_PROG send 21 | \
+   grep '^[ \t]*\-\-stream\-version version'  /dev/null 21
+   if [ $? -ne 0 ]; then
+   _notrun Missing btrfs-progs send --stream-version command line 
option, skipped this test
+   fi
+}
+
 init_rc()
 {
if [ $iam == new ]
diff --git a/tests/btrfs/047 b/tests/btrfs/047
new file mode 100755
index 000..cc1936d
--- /dev/null
+++ b/tests/btrfs/047
@@ -0,0 +1,122 @@
+#! /bin/bash
+# FS QA Test No. btrfs/047
+#
+# Verify that after an incremental btrfs send the replicated file has
+# the same exact hole and data structure as in the origin filesystem.
+# This didn't use to be the case before the send stream version 2 -
+# holes were sent as write operations of 0 valued bytes instead of punching
+# holes with the fallocate system call, and pre-allocated extents were sent
+# as well as write operations of 0 valued bytes instead of intructions for
+# the receiver to use the fallocate system call. Also check that prealloc
+# extents that lie beyond the file's size are replicated by an incremental
+# send.
+#
+# More specifically, this structure preserving guarantee was added by the
+# following linux kernel commits:
+#
+#Btrfs: send, use fallocate command to punch holes
+#Btrfs: send, use fallocate command to allocate extents
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $send_files_dir
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/punch
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_fssum
+_require_xfs_io_fiemap
+_require_btrfs_send_stream_version
+_need_to_be_root
+
+send_files_dir=$TEST_DIR/btrfs-test-$seq
+
+rm -f $seqres.full
+rm -fr $send_files_dir
+mkdir $send_files_dir
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+$XFS_IO_PROG -f -c pwrite -S 0x01 -b 30 0 30 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap1
+
+$XFS_IO_PROG -c fpunch 10 5 $SCRATCH_MNT/foo
+$XFS_IO_PROG -c

[PATCH 6/6 v2] Btrfs: add send_stream_version attribute to sysfs

2014-04-20 Thread Filipe David Borba Manana
So that applications can find out what's the highest send stream
version supported/implemented by the running kernel:

$ cat /sys/fs/btrfs/send/stream_version
2

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Renamed /sys/fs/btrfs/send_stream_version to 
/sys/fs/btrfs/send/stream_version,
as in the future it might be useful to add other sysfs attrbutes related to
send (other ro information or tunables like internal buffer sizes, etc).

 fs/btrfs/send.h  |  1 +
 fs/btrfs/sysfs.c | 27 +++
 2 files changed, 28 insertions(+)

diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 987936c..047fd6d 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -22,6 +22,7 @@
 #define BTRFS_SEND_STREAM_MAGIC btrfs-stream
 #define BTRFS_SEND_STREAM_VERSION_1 1
 #define BTRFS_SEND_STREAM_VERSION_2 2
+#define BTRFS_SEND_STREAM_VERSION_LATEST BTRFS_SEND_STREAM_VERSION_2
 
 #define BTRFS_SEND_BUF_SIZE (1024 * 64)
 #define BTRFS_SEND_READ_SIZE (1024 * 48)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 58a1dd1..d93c0b5 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -31,6 +31,7 @@
 #include transaction.h
 #include sysfs.h
 #include volumes.h
+#include send.h
 
 static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
 
@@ -662,6 +663,26 @@ static int btrfs_init_debugfs(void)
return 0;
 }
 
+static ssize_t send_stream_version_show(struct kobject *kobj,
+   struct kobj_attribute *a,
+   char *buf)
+{
+   return snprintf(buf, PAGE_SIZE, %d\n,
+   BTRFS_SEND_STREAM_VERSION_LATEST);
+}
+
+BTRFS_ATTR(stream_version, 0444, send_stream_version_show);
+
+static struct attribute *btrfs_send_attrs[] = {
+   BTRFS_ATTR_PTR(stream_version),
+   NULL
+};
+
+static const struct attribute_group btrfs_send_attr_group = {
+   .name = send,
+   .attrs = btrfs_send_attrs,
+};
+
 int btrfs_init_sysfs(void)
 {
int ret;
@@ -678,8 +699,13 @@ int btrfs_init_sysfs(void)
ret = sysfs_create_group(btrfs_kset-kobj, btrfs_feature_attr_group);
if (ret)
goto out2;
+   ret = sysfs_create_group(btrfs_kset-kobj, btrfs_send_attr_group);
+   if (ret)
+   goto out3;
 
return 0;
+out3:
+   sysfs_remove_group(btrfs_kset-kobj, btrfs_feature_attr_group);
 out2:
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
 out1:
@@ -691,6 +717,7 @@ out1:
 void btrfs_exit_sysfs(void)
 {
sysfs_remove_group(btrfs_kset-kobj, btrfs_feature_attr_group);
+   sysfs_remove_group(btrfs_kset-kobj, btrfs_send_attr_group);
kset_unregister(btrfs_kset);
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] xfstests: btrfs/004, fix filefrag filter for files with 1 block only

2014-04-17 Thread Filipe David Borba Manana
If the file consists of a single block, then filefrag mentions
'1 block of ...', and the filter expected 'blocks of ...'.

Example:

$ echo qwerty  foobar
$ filefrag -v foobar
Filesystem type is: ef53
File size of foobar is 7 (1 block of 4096 bytes)
 ext: logical_offset:physical_offset: length:   expected: flags:
   0:0..   0:  0.. 0:  1: 
unknown,delalloc,eof
foobar: 1 extent found

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/btrfs/004 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/btrfs/004 b/tests/btrfs/004
index 211d8bc..670e1c2 100755
--- a/tests/btrfs/004
+++ b/tests/btrfs/004
@@ -58,7 +58,7 @@ _require_command /usr/sbin/filefrag
 rm -f $seqres.full
 
 FILEFRAG_FILTER='
-   if (/blocks of (\d+) bytes/) {
+   if (/blocks? of (\d+) bytes/) {
$blocksize = $1;
next
}
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] xfstests: btrfs/004, fix failure with inlined file extents

2014-04-17 Thread Filipe David Borba Manana
Files that consist of an inline extent, have the corresponding
data in the filesystem btree and not on a dedicated extent. For
such extents filefrag (fiemap) will report a physical location
of 0 for that extent and set the 'inline' flag.

The btrfs inspect-internal logical-resolve command will cause a
lookup in the extent tree for the extent address we give it as
an argument, which fails with errno ENOENT if it is 0.

This error didn't happen always, as the test uses fsstress to
generate a random filesystem, which needed to generate at least
one file that could be inlined (content less than 4018 bytes).

Example, taken from results/btrfs/004.full:

   # filefrag -v /home/fdmanana/btrfs-tests/scratch_1/snap1/p0/de/d1b/dcb/fb1
   Filesystem type is: 9123683e
   File size of /home/fdmanana/btrfs-tests/scratch_1/snap1/p0/de/d1b/dcb/fb1 is 
3860 (1 block of 4096 bytes)
ext: logical_offset:physical_offset: length:   expected: flags:
  0:0..4095:  0..  4095:   4096: 
not_aligned,inline,eof
  1:  280.. 344:  35190.. 35254: 65:  1: eof
   /home/fdmanana/btrfs-tests/scratch_1/snap1/p0/de/d1b/dcb/fb1: 2 extents found
   after filter: 0#0#0 0#0#0
   # stat -c %i /home/fdmanana/btrfs-tests/scratch_1/snap1/p0/de/d1b/dcb/fb1
   403
   # /home/fdmanana/git/hub/btrfs-progs/btrfs inspect-internal logical-resolve 
-P 0 /home/fdmanana/btrfs-tests/scratch_1
   ioctl ret=-1, error: No such file or directory

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 tests/btrfs/004 | 34 +++---
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/tests/btrfs/004 b/tests/btrfs/004
index 670e1c2..1d5b816 100755
--- a/tests/btrfs/004
+++ b/tests/btrfs/004
@@ -65,9 +65,11 @@ FILEFRAG_FILTER='
($ext, $logical, $physical, $length) =
(/^\s*(\d+):\s+(\d+)..\s+\d+:\s+(\d+)..\s+\d+:\s+(\d+):/)
or next;
+   ($flags) = /.*:\s*(\S*)$/;
print $physical * $blocksize, #,
  $length * $blocksize, #,
- $logical * $blocksize,  '
+ $logical * $blocksize, #,
+ $flags,  '
 
 # this makes filefrag output script readable by using a perl helper.
 # output is one extent per line, with three numbers separated by '#'
@@ -230,16 +232,26 @@ workout()
continue;
fi
for i in $extents; do
-   physical=$i
-   length=$i
-   logical=$i
-   physical=`echo $physical | sed -e 's/#.*//'`
-   length=`echo $length | sed -e 's/[^#]+#//'`
-   length=`echo $length | sed -e 's/#.*//'`
-   logical=`echo $logical | sed -e 's/.*#//'`
-   _btrfs_inspect_check $file $physical $length $logical \
-   $snap_name
-   ret=$?
+   physical=`echo $i | cut -d '#' -f 1`
+   length=`echo $i | cut -d '#' -f 2`
+   logical=`echo $i | cut -d '#' -f 3`
+   flags=`echo $i | cut -d '#' -f 4`
+   # Skip inline extents, otherwise btrfs inspect-internal
+   # logical-resolve will fail (with errno ENOENT), as it
+   # can't find an extent with a start address of 0 in the
+   # extent tree.
+   if [ $physical -eq 0 ]; then
+   echo $flags | grep -E '(^|,)inline(,|$)' \
+/dev/null
+   ret=$?
+   if [ $ret -ne 0 ]; then
+   echo Unexpected physical address 0 for 
non-inline extent, file $file, flags $flags
+   fi
+   else
+   _btrfs_inspect_check $file $physical $length \
+   $logical $snap_name
+   ret=$?
+   fi
if [ $ret -ne 0 ]; then
errcnt=`expr $errcnt + 1`
fi
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4 v2] Btrfs: send, bump stream version

2014-04-16 Thread Filipe David Borba Manana
This increases the send stream version from version 1 to version 2, adding
2 new commands:

1) total data size - used to tell the receiver how much file data the stream
   will add or update;

2) fallocate - used to pre-allocate space for files and to punch holes in files.

This is preparation work for subsequent changes that implement the new features
(computing total data size and use fallocate for better performance).

A version 2 stream is only produced if the send ioctl caller passes in one of 
the
new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE),
meaning old clients are unaffected.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.

 fs/btrfs/send.c|  6 +-
 fs/btrfs/send.h| 14 +-
 include/uapi/linux/btrfs.h | 24 +++-
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 289e9f3..53712aa 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -632,7 +632,11 @@ static int send_header(struct send_ctx *sctx)
struct btrfs_stream_header hdr;
 
strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
-   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
+   if (sctx-flags  (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE |
+  BTRFS_SEND_FLAG_SUPPORT_FALLOCATE))
+   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION_2);
+   else
+   hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION_1);
 
return write_buf(sctx-send_filp, hdr, sizeof(hdr),
sctx-send_off);
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 48d425a..367030d 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -20,7 +20,8 @@
 #include ctree.h
 
 #define BTRFS_SEND_STREAM_MAGIC btrfs-stream
-#define BTRFS_SEND_STREAM_VERSION 1
+#define BTRFS_SEND_STREAM_VERSION_1 1
+#define BTRFS_SEND_STREAM_VERSION_2 2
 
 #define BTRFS_SEND_BUF_SIZE (1024 * 64)
 #define BTRFS_SEND_READ_SIZE (1024 * 48)
@@ -87,6 +88,11 @@ enum btrfs_send_cmd {
 
BTRFS_SEND_C_END,
BTRFS_SEND_C_UPDATE_EXTENT,
+
+   /* added in stream version 2 */
+   BTRFS_SEND_C_TOTAL_DATA_SIZE,
+   BTRFS_SEND_C_FALLOCATE,
+
__BTRFS_SEND_C_MAX,
 };
 #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
@@ -125,10 +131,16 @@ enum {
BTRFS_SEND_A_CLONE_OFFSET,
BTRFS_SEND_A_CLONE_LEN,
 
+   /* added in stream version 2 */
+   BTRFS_SEND_A_FALLOCATE_FLAGS,
+
__BTRFS_SEND_A_MAX,
 };
 #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
 
+#define BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE   (1  0)
+#define BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE  (1  1)
+
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
 #endif
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index b4d6909..6611406 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -464,10 +464,32 @@ struct btrfs_ioctl_received_subvol_args {
  */
 #define BTRFS_SEND_FLAG_OMIT_END_CMD   0x4
 
+/*
+ * Calculate the amount (in bytes) of new file data between the send and
+ * parent snapshots, or in case of a full send, the total amount of file data
+ * we will send.
+ * This corresponds to the sum of the data lengths of each write, clone and
+ * fallocate commands that are sent through the send stream. The receiving end
+ * can use this information to compute progress.
+ *
+ * Added in send stream version 2.
+ */
+#define BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE0x8
+
+/*
+ * Use fallocate command to pre-allocate file extents and punch file holes,
+ * instead of write commands with data buffers filled with 0 value bytes.
+ *
+ * Added in send stream version 2.
+ */
+#define BTRFS_SEND_FLAG_SUPPORT_FALLOCATE   0x10
+
 #define BTRFS_SEND_FLAG_MASK \
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
 BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
-BTRFS_SEND_FLAG_OMIT_END_CMD)
+BTRFS_SEND_FLAG_OMIT_END_CMD | \
+BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE |  \
+BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
 
 struct btrfs_ioctl_send_args {
__s64 send_fd;  /* in */
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4 v2] Btrfs: send, use fallocate command to punch holes

2014-04-16 Thread Filipe David Borba Manana
Instead of sending a write command with a data buffer filled with 0 value bytes,
use the fallocate command, introduced in the send stream version 2, to tell the
receiver to punch a file hole using the fallocate system call.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.

 fs/btrfs/send.c | 56 +++-
 fs/btrfs/send.h |  4 
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f5db492..2c6d58c 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -564,6 +564,7 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const 
void *data, int len)
return tlv_put(sctx, attr, __tmp, sizeof(__tmp));  \
}
 
+TLV_PUT_DEFINE_INT(32)
 TLV_PUT_DEFINE_INT(64)
 
 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
@@ -4483,15 +4484,16 @@ out:
return ret;
 }
 
-static int send_hole(struct send_ctx *sctx, u64 end)
+static int send_fallocate(struct send_ctx *sctx, u32 flags,
+ u64 offset, u64 len)
 {
struct fs_path *p = NULL;
-   u64 offset = sctx-cur_inode_last_extent;
-   u64 len;
int ret = 0;
 
+   ASSERT(sctx-flags  BTRFS_SEND_FLAG_SUPPORT_FALLOCATE);
+
if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
-   sctx-total_data_size += end - offset;
+   sctx-total_data_size += len;
return 0;
}
 
@@ -4500,6 +4502,43 @@ static int send_hole(struct send_ctx *sctx, u64 end)
return -ENOMEM;
ret = get_cur_path(sctx, sctx-cur_ino, sctx-cur_inode_gen, p);
if (ret  0)
+   goto out;
+
+   ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
+   if (ret  0)
+   goto out;
+   TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+   TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_FLAGS, flags);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
+   ret = send_cmd(sctx);
+
+tlv_put_failure:
+out:
+   fs_path_free(p);
+   return ret;
+}
+
+static int send_hole(struct send_ctx *sctx, u64 end)
+{
+   struct fs_path *p = NULL;
+   u64 offset = sctx-cur_inode_last_extent;
+   u64 len = end - offset;
+   int ret = 0;
+
+   if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
+   sctx-total_data_size += len;
+   return 0;
+   }
+
+   if (sctx-flags  BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
+   return send_fallocate(sctx,
+ BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+ offset,
+ len);
+
+   ret = get_cur_path(sctx, sctx-cur_ino, sctx-cur_inode_gen, p);
+   if (ret  0)
goto tlv_put_failure;
memset(sctx-read_buf, 0, BTRFS_SEND_READ_SIZE);
while (offset  end) {
@@ -4551,7 +4590,8 @@ static int send_write_or_clone(struct send_ctx *sctx,
len = btrfs_file_extent_num_bytes(path-nodes[0], ei);
}
 
-   if (offset + len  sctx-cur_inode_size)
+   if (offset  sctx-cur_inode_size 
+   offset + len  sctx-cur_inode_size)
len = sctx-cur_inode_size - offset;
if (len == 0) {
ret = 0;
@@ -4568,6 +4608,12 @@ static int send_write_or_clone(struct send_ctx *sctx,
ret = send_clone(sctx, offset, len, clone_root);
} else if (sctx-flags  BTRFS_SEND_FLAG_NO_FILE_DATA) {
ret = send_update_extent(sctx, offset, len);
+   } else if (btrfs_file_extent_disk_bytenr(path-nodes[0], ei) == 0 
+  type != BTRFS_FILE_EXTENT_INLINE 
+  (sctx-flags  BTRFS_SEND_FLAG_SUPPORT_FALLOCATE) 
+  offset  sctx-cur_inode_size) {
+   ret = send_fallocate(sctx, BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+offset, len);
} else {
while (pos  len) {
l = len - pos;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 367030d..a632c0d 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -141,6 +141,10 @@ enum {
 #define BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE   (1  0)
 #define BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE  (1  1)
 
+#define BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS\
+   (BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE |  \
+BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE)
+
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
 #endif
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4 v2] Btrfs: send, use fallocate command to allocate extents

2014-04-16 Thread Filipe David Borba Manana
The send stream version 2 adds the fallocate command, which can be used to
allocate extents for a file or punch holes in a file. Previously we were
ignoring file prealloc extents or treating them as extents filled with 0
bytes and sending a regular write command to the stream.

After this change, together with my previous change titled:

Btrfs: send, use fallocate command to punch holes

an incremental send preserves the hole and data structure of files, which can
be seen via calls to lseek with the whence parameter set to SEEK_DATA or 
SEEK_HOLE,
as the example below shows:

mkfs.btrfs -f /dev/sdc
mount /dev/sdc /mnt
xfs_io -f -c pwrite -S 0x01 -b 30 0 30 /mnt/foo
btrfs subvolume snapshot -r /mnt /mnt/mysnap1

xfs_io -c fpunch 10 5 /mnt/foo
xfs_io -c falloc 10 5 /mnt/foo
xfs_io -c pwrite -S 0xff -b 1000 12 1000 /mnt/foo
xfs_io -c fpunch 25 2 /mnt/foo

# prealloc extents that start beyond the inode's size
xfs_io -c falloc -k 30 100 /mnt/foo
xfs_io -c falloc -k 900 200 /mnt/foo

btrfs subvolume snapshot -r /mnt /mnt/mysnap2

btrfs send /mnt/mysnap1 -f /tmp/1.snap
btrfs send -p /mnt/mysnap1 /mnt/mysnap2 -f /tmp/2.snap

mkfs.btrfs -f /dev/sdd
mount /dev/sdd /mnt2
btrfs receive /mnt2 -f /tmp/1.snap
btrfs receive /mnt2 -f /tmp/2.snap

Before this change the hole/data structure differed between both filesystems:

$ xfs_io -r -c 'seek -r -a 0' /mnt/mysnap2/foo
Whence  Result
DATA0
HOLE102400
DATA118784
HOLE122880
DATA147456
HOLE253952
DATA266240
HOLE30

$ xfs_io -r -c 'seek -r -a 0' /mnt2/mysnap2/foo
Whence  Result
DATA0
HOLE30

After this change the second filesystem (/dev/sdd) ends up with the same 
hole/data
structure as the first filesystem.

Also, after this change, prealloc extents that lie beyond the inode's size (were
allocated with fallocate + keep size flag) are also replicated by an incremental
send. For the above test, it can be observed via fiemap (or btrfs-debug-tree):

$ xfs_io -r -c 'fiemap -l' /mnt2/mysnap2/foo
0: [0..191]: 25096..25287 192 blocks
1: [192..199]: 24672..24679 8 blocks
2: [200..231]: 24584..24615 32 blocks
3: [232..239]: 24680..24687 8 blocks
4: [240..287]: 24616..24663 48 blocks
5: [288..295]: 24688..24695 8 blocks
6: [296..487]: 25392..25583 192 blocks
7: [488..495]: 24696..24703 8 blocks
8: [496..519]: hole 24 blocks
9: [520..527]: 24704..24711 8 blocks
10: [528..583]: 25624..25679 56 blocks
11: [584..591]: 24712..24719 8 blocks
12: [592..2543]: 26192..28143 1952 blocks
13: [2544..17575]: hole 15032 blocks
14: [17576..21487]: 28144..32055 3912 blocks

A test case for xfstests will follow.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.

 fs/btrfs/send.c | 70 +++--
 1 file changed, 48 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2c6d58c..043fd43 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -113,9 +113,10 @@ struct send_ctx {
 */
u64 cur_ino;
u64 cur_inode_gen;
-   int cur_inode_new;
-   int cur_inode_new_gen;
-   int cur_inode_deleted;
+   u8 cur_inode_new:1;
+   u8 cur_inode_new_gen:1;
+   u8 cur_inode_skip_truncate:1;
+   u8 cur_inode_deleted:1;
u64 cur_inode_size;
u64 cur_inode_mode;
u64 cur_inode_rdev;
@@ -4599,8 +4600,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
}
 
if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
-   if (offset  sctx-cur_inode_size)
-   sctx-total_data_size += len;
+   sctx-total_data_size += len;
goto out;
}
 
@@ -4614,6 +4614,27 @@ static int send_write_or_clone(struct send_ctx *sctx,
   offset  sctx-cur_inode_size) {
ret = send_fallocate(sctx, BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
 offset, len);
+   } else if (type == BTRFS_FILE_EXTENT_PREALLOC 
+  (sctx-flags  BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)) {
+   u32 mode = 0;
+   if (offset  sctx-cur_inode_size) {
+   ret = send_fallocate(sctx,
+BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+offset, len);
+   if (ret)
+   goto out;
+   } else {
+   if (!sctx-cur_inode_skip_truncate

[PATCH 1/4 v2] Btrfs-progs: send, bump stream version

2014-04-16 Thread Filipe David Borba Manana
This increases the send stream version from version 1 to version 2, adding
2 new commands:

1) total data size - used to tell the receiver how much file data the stream
   will add or update;

2) fallocate - used to pre-allocate space for files and to punch holes in files.

This is preparation work for subsequent changes that implement the new features
(computing total data size and use fallocate for better performance).

This doesn't break compatibility with older kernels or clients. In order to get
a version 2 send stream, new flags must be passed to the send ioctl.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Added new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE. A version 2
stream is now only produced is the ioctl caller specifies at least one of
the new send flags (BTRFS_SEND_FLAG_SUPPORT_FALLOCATE or
BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE).

 ioctl.h | 18 ++
 send.h  | 13 -
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/ioctl.h b/ioctl.h
index 231660a..e2c506b 100644
--- a/ioctl.h
+++ b/ioctl.h
@@ -392,6 +392,24 @@ struct btrfs_ioctl_received_subvol_args {
  */
 #define BTRFS_SEND_FLAG_OMIT_END_CMD   0x4
 
+/*
+ * The sum of all length fields the receiver will get in write, clone and
+ * fallocate commands.
+ * This can be used by the receiver to compute progress, at the expense of some
+ * initial metadata scan performed by the sender (kernel).
+ *
+ * Added in send stream version 2.
+ */
+#define BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE0x8
+
+/*
+ * Use fallocate command to pre-allocate file extents and punch file holes,
+ * instead of write commands with data buffers filled with 0 value bytes.
+ *
+ * Added in send stream version 2.
+ */
+#define BTRFS_SEND_FLAG_SUPPORT_FALLOCATE   0x10
+
 struct btrfs_ioctl_send_args {
__s64 send_fd;  /* in */
__u64 clone_sources_count;  /* in */
diff --git a/send.h b/send.h
index e8da785..69e81fb 100644
--- a/send.h
+++ b/send.h
@@ -24,7 +24,7 @@ extern C {
 #endif
 
 #define BTRFS_SEND_STREAM_MAGIC btrfs-stream
-#define BTRFS_SEND_STREAM_VERSION 1
+#define BTRFS_SEND_STREAM_VERSION 2
 
 #define BTRFS_SEND_BUF_SIZE (1024 * 64)
 #define BTRFS_SEND_READ_SIZE (1024 * 48)
@@ -91,6 +91,11 @@ enum btrfs_send_cmd {
 
BTRFS_SEND_C_END,
BTRFS_SEND_C_UPDATE_EXTENT,
+
+   /* added in stream version 2 */
+   BTRFS_SEND_C_TOTAL_DATA_SIZE,
+   BTRFS_SEND_C_FALLOCATE,
+
__BTRFS_SEND_C_MAX,
 };
 #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
@@ -129,10 +134,16 @@ enum {
BTRFS_SEND_A_CLONE_OFFSET,
BTRFS_SEND_A_CLONE_LEN,
 
+   /* added in stream version 2 */
+   BTRFS_SEND_A_FALLOCATE_FLAGS,
+
__BTRFS_SEND_A_MAX,
 };
 #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
 
+#define BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE   (1  0)
+#define BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE  (1  1)
+
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
 #endif
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4 v2] Btrfs-progs: send, implement total data size callback and progress report

2014-04-16 Thread Filipe David Borba Manana
This is a followup to the kernel patch titled:

Btrfs: send, implement total data size command to allow for progress 
estimation

This makes the btrfs send and receive commands aware of the new send flag,
named BTRFS_SEND_C_TOTAL_DATA_SIZE, which tells us the amount of file data
that is new between the parent and send snapshots/roots. As this command
immediately follows the commands to start a snapshot/subvolume, it can be
used to report and compute progress, by keeping a counter that is incremented
with the data length of each write, clone and fallocate command that is received
from the stream.

Example:

$ btrfs send -o /mnt/sdd/snap_base | btrfs receive /mnt/sdc
At subvol /mnt/sdd/snap_base
At subvol snap_base
About to receive 9212392667 bytes
Subvolume /mnt/sdc//snap_base, 4059722426 / 9212392667 bytes received, 
44.07%, 40.32MB/s

$ btrfs send -o -p /mnt/sdd/snap_base /mnt/sdd/snap_incr | btrfs receive 
/mnt/sdc
At subvol /mnt/sdd/snap_incr
At subvol snap_incr
About to receive 9571342213 bytes
Subvolume /mnt/sdc//snap_incr, 6557345221 / 9571342213 bytes received, 
68.51%, 51.04MB/s

At the moment progress is only reported by btrfs-receive, but it is possible 
and simple
to do it for btrfs-send too, so that we can get progress report when not piping 
btrfs-send
output to btrfs-receive (directly to a file).

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Added new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE. A version 2
stream is now only produced is the ioctl caller specifies at least one of
the new send flags (BTRFS_SEND_FLAG_SUPPORT_FALLOCATE or
BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE).

 Documentation/btrfs-send.txt |  3 ++
 cmds-receive.c   | 91 
 cmds-send.c  | 14 ++-
 send-stream.c|  4 ++
 send-stream.h|  1 +
 5 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/Documentation/btrfs-send.txt b/Documentation/btrfs-send.txt
index 18a98fa..38470b0 100644
--- a/Documentation/btrfs-send.txt
+++ b/Documentation/btrfs-send.txt
@@ -40,6 +40,9 @@ Use this snapshot as a clone source for an incremental send 
(multiple allowed).
 -f outfile::
 Output is normally written to stdout. To write to a file, use this option.
 An alternative would be to use pipes.
+-o::
+Obtain the total data size for each subvolume or snapshot to send. This 
demands additional
+processing (mostly IO bound) but is useful for the receive command to report 
progress.
 
 EXIT STATUS
 ---
diff --git a/cmds-receive.c b/cmds-receive.c
index d6cd3da..19300fc 100644
--- a/cmds-receive.c
+++ b/cmds-receive.c
@@ -32,6 +32,7 @@
 #include ftw.h
 #include wait.h
 #include assert.h
+#include time.h
 
 #include sys/stat.h
 #include sys/types.h
@@ -71,6 +72,14 @@ struct btrfs_receive
struct subvol_uuid_search sus;
 
int honor_end_cmd;
+
+   /* For the subvolume/snapshot we're currently receiving. */
+   u64 total_data_size;
+   u64 bytes_received;
+   time_t last_progress_update;
+   u64 bytes_received_last_update;
+   float progress;
+   const char *target;
 };
 
 static int finish_subvol(struct btrfs_receive *r)
@@ -156,6 +165,12 @@ static int process_subvol(const char *path, const u8 
*uuid, u64 ctransid,
goto out;
 
r-cur_subvol = calloc(1, sizeof(*r-cur_subvol));
+   r-total_data_size = 0;
+   r-bytes_received = 0;
+   r-progress = 0.0;
+   r-last_progress_update = 0;
+   r-bytes_received_last_update = 0;
+   r-target = Subvolume;
 
if (strlen(r-dest_dir_path) == 0)
r-cur_subvol-path = strdup(path);
@@ -205,6 +220,12 @@ static int process_snapshot(const char *path, const u8 
*uuid, u64 ctransid,
goto out;
 
r-cur_subvol = calloc(1, sizeof(*r-cur_subvol));
+   r-total_data_size = 0;
+   r-bytes_received = 0;
+   r-progress = 0.0;
+   r-last_progress_update = 0;
+   r-bytes_received_last_update = 0;
+   r-target = Snapshot;
 
if (strlen(r-dest_dir_path) == 0)
r-cur_subvol-path = strdup(path);
@@ -287,6 +308,73 @@ out:
return ret;
 }
 
+static int process_total_data_size(u64 size, void *user)
+{
+   struct btrfs_receive *r = user;
+
+   r-total_data_size = size;
+   fprintf(stdout, About to receive %llu bytes\n, size);
+
+   return 0;
+}
+
+static void update_progress(struct btrfs_receive *r, u64 bytes)
+{
+   float new_progress;
+   time_t now;
+   time_t tdiff;
+
+   if (r-total_data_size == 0)
+   return;
+
+   r-bytes_received += bytes;
+
+   now = time(NULL);
+   tdiff = now - r-last_progress_update;
+   if (tdiff  1) {
+   if (r-bytes_received == r-total_data_size)
+   fprintf(stdout, \n);
+   return;
+   }
+
+   new_progress = ((float

[PATCH 3/4 v2] Btrfs-progs: send, implement fallocate command callback

2014-04-16 Thread Filipe David Borba Manana
The fallocate send stream command, added in stream version 2, is used to
pre-allocate space for files and punch file holes. This change implements
the callback for that new command, using the fallocate function from the
standard C library to carry out the specified action (allocate file space
or punch a file hole).

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Use the new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE if the user
asks for it (-a command line option), which will make the kernel generate
a version 2 send stream, so that old clients aren't affected.

 Documentation/btrfs-send.txt |  3 +++
 cmds-receive.c   | 38 ++
 cmds-send.c  | 12 ++--
 send-stream.c| 13 +
 send-stream.h|  2 ++
 5 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/Documentation/btrfs-send.txt b/Documentation/btrfs-send.txt
index 38470b0..e96be07 100644
--- a/Documentation/btrfs-send.txt
+++ b/Documentation/btrfs-send.txt
@@ -43,6 +43,9 @@ An alternative would be to use pipes.
 -o::
 Obtain the total data size for each subvolume or snapshot to send. This 
demands additional
 processing (mostly IO bound) but is useful for the receive command to report 
progress.
+-a::
+Use fallocate to pre-allocate file extents and to punch file holes, instead of 
writing zeroes
+to files.
 
 EXIT STATUS
 ---
diff --git a/cmds-receive.c b/cmds-receive.c
index 19300fc..3f30066 100644
--- a/cmds-receive.c
+++ b/cmds-receive.c
@@ -41,6 +41,7 @@
 #include sys/types.h
 #include sys/xattr.h
 #include uuid/uuid.h
+#include linux/falloc.h
 
 #include ctree.h
 #include ioctl.h
@@ -887,6 +888,42 @@ out:
return ret;
 }
 
+static int process_fallocate(const char *path, u32 flags, u64 offset,
+u64 len, void *user)
+{
+   struct btrfs_receive *r = user;
+   char *full_path = path_cat(r-full_subvol_path, path);
+   int mode = 0;
+   int ret;
+
+   if (flags  BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE)
+   mode |= FALLOC_FL_KEEP_SIZE;
+   if (flags  BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE)
+   mode |= FALLOC_FL_PUNCH_HOLE;
+
+   if (g_verbose = 2)
+   fprintf(stderr,
+   fallocate %s - flags %u, offset %llu, len %llu\n,
+   path, flags, offset, len);
+
+   ret = open_inode_for_write(r, full_path);
+   if (ret  0)
+   goto out;
+
+   ret = fallocate(r-write_fd, mode, offset, len);
+   if (ret) {
+   ret = -errno;
+   fprintf(stderr,
+   ERROR: fallocate against %s failed. %s\n,
+   path, strerror(-ret));
+   goto out;
+   }
+   update_progress(r, len);
+
+out:
+   free(full_path);
+   return ret;
+}
 
 static struct btrfs_send_ops send_ops = {
.subvol = process_subvol,
@@ -910,6 +947,7 @@ static struct btrfs_send_ops send_ops = {
.chown = process_chown,
.utimes = process_utimes,
.total_data_size = process_total_data_size,
+   .fallocate = process_fallocate,
 };
 
 static int do_receive(struct btrfs_receive *r, const char *tomnt, int r_fd)
diff --git a/cmds-send.c b/cmds-send.c
index 69f5ba1..2a62e68 100644
--- a/cmds-send.c
+++ b/cmds-send.c
@@ -46,6 +46,7 @@
 
 static int g_verbose = 0;
 static int g_total_data_size = 0;
+static int g_fallocate = 0;
 
 struct btrfs_send {
int send_fd;
@@ -284,6 +285,8 @@ static int do_send(struct btrfs_send *send, u64 
parent_root_id,
io_send.flags |= BTRFS_SEND_FLAG_OMIT_END_CMD;
if (g_total_data_size)
io_send.flags |= BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE;
+   if (g_fallocate)
+   io_send.flags |= BTRFS_SEND_FLAG_SUPPORT_FALLOCATE;
ret = ioctl(subvol_fd, BTRFS_IOC_SEND, io_send);
if (ret) {
ret = -errno;
@@ -427,7 +430,7 @@ int cmd_send(int argc, char **argv)
memset(send, 0, sizeof(send));
send.dump_fd = fileno(stdout);
 
-   while ((c = getopt(argc, argv, veoc:f:i:p:)) != -1) {
+   while ((c = getopt(argc, argv, veoac:f:i:p:)) != -1) {
switch (c) {
case 'v':
g_verbose++;
@@ -517,6 +520,9 @@ int cmd_send(int argc, char **argv)
case 'o':
g_total_data_size = 1;
break;
+   case 'a':
+   g_fallocate = 1;
+   break;
case '?':
default:
fprintf(stderr, ERROR: send args invalid.\n);
@@ -679,7 +685,7 @@ out:
 }
 
 const char * const cmd_send_usage[] = {
-   btrfs send [-veo] [-p parent] [-c clone-src] [-f outfile] 
subvol [subvol...],
+   btrfs send [-veoa] [-p parent] [-c clone-src] [-f outfile] 
subvol [subvol...],
Send

[PATCH 2/4 v2] Btrfs: send, implement total data size command to allow for progress estimation

2014-04-16 Thread Filipe David Borba Manana
This new send flag makes send calculate first the amount of new file data (in 
bytes)
the send root has relatively to the parent root, or for the case of a 
non-incremental
send, the total amount of file data the stream will create (including holes and 
prealloc
extents). In other words, it computes the sum of the lengths of all write, 
clone and
fallocate operations that will be sent through the send stream.

This data size value is sent in a new command, named 
BTRFS_SEND_C_TOTAL_DATA_SIZE, that
immediately follows a BTRFS_SEND_C_SUBVOL or BTRFS_SEND_C_SNAPSHOT command, and 
precedes
any command that changes a file or the filesystem hierarchy. Upon receiving a 
write, clone
or fallocate command, the receiving end can increment a counter by the data 
length of that
command and therefore report progress by comparing the counter's value with the 
data size
value received in the BTRFS_SEND_C_TOTAL_DATA_SIZE command.

The approach is simple, before the normal operation of send, do a scan in the 
file system
tree for new inodes and new/changed file extent items, just like in send's 
normal operation,
and keep incrementing a counter with new inodes' size and the size of file 
extents (and file
holes)  that are going to be written, cloned or fallocated. This is actually a 
simpler and
more lightweight tree scan/processing than the one we do when sending the 
changes, as it
doesn't process inode references nor does any lookups in the extent tree for 
example.

After modifying btrfs-progs to understand this new command and report progress, 
here's an
example (the -o flag tells btrfs send to pass the new flag to the kernel's send 
ioctl):

$ btrfs send -o /mnt/sdd/snap_base | btrfs receive /mnt/sdc
At subvol /mnt/sdd/snap_base
At subvol snap_base
About to receive 9212392667 bytes
Subvolume /mnt/sdc//snap_base, 4059722426 / 9212392667 bytes received, 
44.07%, 40.32MB/s

$ btrfs send -o -p /mnt/sdd/snap_base /mnt/sdd/snap_incr | btrfs receive 
/mnt/sdc
At subvol /mnt/sdd/snap_incr
At subvol snap_incr
About to receive 9571342213 bytes
Subvolume /mnt/sdc//snap_incr, 6557345221 / 9571342213 bytes received, 
68.51%, 51.04MB/s

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.

 fs/btrfs/send.c | 194 ++--
 1 file changed, 162 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 53712aa..f5db492 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -81,7 +81,13 @@ struct clone_root {
 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128
 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
 
+enum btrfs_send_phase {
+   SEND_PHASE_STREAM_CHANGES,
+   SEND_PHASE_COMPUTE_DATA_SIZE,
+};
+
 struct send_ctx {
+   enum btrfs_send_phase phase;
struct file *send_filp;
loff_t send_off;
char *send_buf;
@@ -116,6 +122,7 @@ struct send_ctx {
u64 cur_inode_last_extent;
 
u64 send_progress;
+   u64 total_data_size;
 
struct list_head new_refs;
struct list_head deleted_refs;
@@ -692,6 +699,8 @@ static int send_rename(struct send_ctx *sctx,
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_rename %s - %s\n, from-start, to-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
@@ -716,6 +725,8 @@ static int send_link(struct send_ctx *sctx,
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_link %s - %s\n, path-start, lnk-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
@@ -739,6 +750,8 @@ static int send_unlink(struct send_ctx *sctx, struct 
fs_path *path)
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_unlink %s\n, path-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
@@ -761,6 +774,8 @@ static int send_rmdir(struct send_ctx *sctx, struct fs_path 
*path)
 {
int ret;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_rmdir %s\n, path-start);
 
ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
@@ -2308,6 +2323,9 @@ static int send_truncate(struct send_ctx *sctx, u64 ino, 
u64 gen, u64 size)
int ret = 0;
struct fs_path *p;
 
+   if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE)
+   return 0;
+
 verbose_printk(btrfs: send_truncate %llu size=%llu\n, ino, size);
 
p = fs_path_alloc();
@@ -2337,6 +2355,8 @@ static int send_chmod(struct send_ctx *sctx, u64 ino, u64 
gen, u64 mode)
int ret = 0;
struct fs_path *p;
 
+   ASSERT(sctx-phase != SEND_PHASE_COMPUTE_DATA_SIZE);
+
 verbose_printk(btrfs: send_chmod

[PATCH 4/4 v2] Btrfs-progs: add write and clone commands debug info to receive

2014-04-16 Thread Filipe David Borba Manana
When specifying -vv print information about received write and clone commands 
too,
as we do this for other commands already and it's very useful for debugging and
troubleshooting.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Added new send ioctl flag BTRFS_SEND_FLAG_SUPPORT_FALLOCATE. A version 2
stream is now only produced is the ioctl caller specifies at least one of
the new send flags (BTRFS_SEND_FLAG_SUPPORT_FALLOCATE or
BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE).

 cmds-receive.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/cmds-receive.c b/cmds-receive.c
index 3f30066..7a23823 100644
--- a/cmds-receive.c
+++ b/cmds-receive.c
@@ -636,6 +636,10 @@ static int process_write(const char *path, const void 
*data, u64 offset,
u64 pos = 0;
int w;
 
+   if (g_verbose = 2)
+   fprintf(stderr, write %s, offset %llu, len %llu\n,
+   path, offset, len);
+
ret = open_inode_for_write(r, full_path);
if (ret  0)
goto out;
@@ -672,6 +676,11 @@ static int process_clone(const char *path, u64 offset, u64 
len,
char *full_clone_path = NULL;
int clone_fd = -1;
 
+   if (g_verbose = 2)
+   fprintf(stderr,
+   clone %s, offset %llu, len %llu, clone path %s, clone 
offset %llu\n,
+   path, offset, len, clone_path, clone_offset);
+
ret = open_inode_for_write(r, full_path);
if (ret  0)
goto out;
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] xfstests: btrfs, test send's ability to punch holes and prealloc extents

2014-04-16 Thread Filipe David Borba Manana
This test verifies that after an incremental btrfs send the
replicated file has the same exact hole and data structure as in
the origin filesystem. This didn't use to be the case before the
send stream version 2 - holes were sent as write operations of 0
valued bytes instead of punching holes with the fallocate system
call, and pre-allocated extents were sent as well as write
operations of 0 valued bytes instead of intructions for the
receiver to use the fallocate system call.

It also checks that prealloc extents that lie beyond the file's
size are replicated by an incremental send.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Addressed Dave's comments, and updated btrfs send invocation, by specifying
the new command line option (-a) that enables use of fallocate - added
function _require_btrfs_send_fallocate_flag() to skip the test when an old
version of btrfs-progs is found.

 common/rc   |   9 
 tests/btrfs/047 | 121 
 tests/btrfs/047.out |  27 
 tests/btrfs/group   |   1 +
 4 files changed, 158 insertions(+)
 create mode 100755 tests/btrfs/047
 create mode 100644 tests/btrfs/047.out

diff --git a/common/rc b/common/rc
index acf419b..e94e51c 100644
--- a/common/rc
+++ b/common/rc
@@ -2262,6 +2262,15 @@ _run_btrfs_util_prog()
run_check $BTRFS_UTIL_PROG $*
 }
 
+_require_btrfs_send_fallocate_flag()
+{
+   $BTRFS_UTIL_PROG send 21 | \
+   grep '^[ \t]*\-a[ \t]\+.* fallocate '  /dev/null 21
+   if [ $? -ne 0 ]; then
+   _notrun Missing btrfs-progs send -a command line option, 
skipped this test
+   fi
+}
+
 init_rc()
 {
if [ $iam == new ]
diff --git a/tests/btrfs/047 b/tests/btrfs/047
new file mode 100755
index 000..c8171a5
--- /dev/null
+++ b/tests/btrfs/047
@@ -0,0 +1,121 @@
+#! /bin/bash
+# FS QA Test No. btrfs/047
+#
+# Verify that after an incremental btrfs send the replicated file has
+# the same exact hole and data structure as in the origin filesystem.
+# This didn't use to be the case before the send stream version 2 -
+# holes were sent as write operations of 0 valued bytes instead of punching
+# holes with the fallocate system call, and pre-allocated extents were sent
+# as well as write operations of 0 valued bytes instead of intructions for
+# the receiver to use the fallocate system call. Also check that prealloc
+# extents that lie beyond the file's size are replicated by an incremental
+# send.
+#
+# More specifically, this structure preserving guarantee was added by the
+# following linux kernel commits:
+#
+#Btrfs: send, use fallocate command to punch holes
+#Btrfs: send, use fallocate command to allocate extents
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $send_files_dir
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/punch
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_fssum
+_require_xfs_io_fiemap
+_require_btrfs_send_fallocate_flag
+_need_to_be_root
+
+send_files_dir=$TEST_DIR/btrfs-test-$seq
+
+rm -f $seqres.full
+rm -fr $send_files_dir
+mkdir $send_files_dir
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+$XFS_IO_PROG -f -c pwrite -S 0x01 -b 30 0 30 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap1
+
+$XFS_IO_PROG -c fpunch 10 5 $SCRATCH_MNT/foo
+$XFS_IO_PROG -c falloc 10 5 $SCRATCH_MNT/foo
+$XFS_IO_PROG -c pwrite -S 0xff -b 1000 12 1000 $SCRATCH_MNT/foo \
+   | _filter_xfs_io
+$XFS_IO_PROG -c fpunch 25 2 $SCRATCH_MNT/foo
+
+$XFS_IO_PROG -c falloc -k 30 100 $SCRATCH_MNT/foo
+$XFS_IO_PROG -c falloc -k 900 200 $SCRATCH_MNT/foo
+
+_run_btrfs_util_prog subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/mysnap2
+
+_run_btrfs_util_prog send -a $SCRATCH_MNT/mysnap1 -f

[PATCH v2] xfstests: btrfs, add test for btrfs properties

2014-04-16 Thread Filipe David Borba Manana
This test case verifies the btrfs properties feature, a new feature
introduced in the linux kernel version 3.14.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: Addressed Dave's comments, removed function to check for existence of
the btrfs-progs property command and use instead existing function
_require_btrfs which checks if a btrfs-progs command exists and is
equivalent to what I had before.

 tests/btrfs/048 | 220 
 tests/btrfs/048.out |  78 +++
 tests/btrfs/group   |   1 +
 3 files changed, 299 insertions(+)
 create mode 100755 tests/btrfs/048
 create mode 100644 tests/btrfs/048.out

diff --git a/tests/btrfs/048 b/tests/btrfs/048
new file mode 100755
index 000..e998f97
--- /dev/null
+++ b/tests/btrfs/048
@@ -0,0 +1,220 @@
+#! /bin/bash
+# FS QA Test No. btrfs/048
+#
+# Btrfs properties test. The btrfs properties feature was introduced in the
+# linux kernel 3.14.
+#
+#---
+# Copyright (c) 2014 Filipe Manana.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+here=`pwd`
+tmp=/tmp/$$
+
+status=1   # failure is the default!
+trap _cleanup; exit \$status 0 1 2 3 15
+
+_cleanup()
+{
+rm -fr $send_files_dir
+rm -fr $tmp
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_btrfs property
+_need_to_be_root
+
+send_files_dir=$TEST_DIR/btrfs-test-$seq
+
+rm -f $seqres.full
+rm -fr $send_files_dir
+mkdir $send_files_dir
+
+_scratch_mkfs /dev/null 21
+_scratch_mount
+
+echo Testing label property
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT label
+echo ***
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT label foobar
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT label
+echo ***
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT
+echo ***
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT label ''
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT label
+echo ***
+mkdir $SCRATCH_MNT/testdir
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir label
+echo ***
+
+echo -e \nTesting subvolume ro property
+_run_btrfs_util_prog subvolume create $SCRATCH_MNT/sv1
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/sv1 ro
+echo ***
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/sv1 ro foo
+echo ***
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/sv1 ro true
+echo ***
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/sv1 ro
+echo ***
+touch $SCRATCH_MNT/sv1/foobar 21 | _filter_scratch
+echo ***
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/sv1 ro false
+touch $SCRATCH_MNT/sv1/foobar 21 | _filter_scratch
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/sv1
+echo ***
+
+echo -e \nTesting compression property
+mkdir $SCRATCH_MNT/testdir/subdir1
+touch $SCRATCH_MNT/testdir/file1
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/file1 compression
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/subdir1 compression
+echo ***
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/testdir/file1 compression \
+   foo 21 | _filter_scratch
+echo ***
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/testdir/file1 compression lzo
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/file1 compression
+
+# Verify property was persisted.
+_scratch_unmount
+_check_scratch_fs
+_scratch_mount
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/file1 compression
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/testdir/file1 compression zlib
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/file1 compression
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/testdir/file1 compression ''
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/file1 compression
+
+# Test compression property inheritance.
+echo ***
+$BTRFS_UTIL_PROG property set $SCRATCH_MNT/testdir/subdir1 compression lzo
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/subdir1 compression
+echo ***
+mkdir $SCRATCH_MNT/testdir/subdir1/subsubdir
+touch $SCRATCH_MNT/testdir/subdir1/some_file
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/subdir1/subsubdir 
compression
+echo ***
+$BTRFS_UTIL_PROG property get $SCRATCH_MNT/testdir/subdir1/some_file 
compression
+echo ***
+mkdir

[PATCH 3/4 v3] Btrfs: send, use fallocate command to punch holes

2014-04-16 Thread Filipe David Borba Manana
Instead of sending a write command with a data buffer filled with 0 value bytes,
use the fallocate command, introduced in the send stream version 2, to tell the
receiver to punch a file hole using the fallocate system call.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---

V2: A v2 stream is now only produced if the send ioctl caller passes in one of
the new flags (BTRFS_SEND_FLAG_CALCULATE_DATA_SIZE | 
BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
to avoid breaking old clients.
V3: Added missing path allocation, messed up rebase.

 fs/btrfs/send.c | 55 ---
 fs/btrfs/send.h |  4 
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f5db492..bb9afea 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -564,6 +564,7 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const 
void *data, int len)
return tlv_put(sctx, attr, __tmp, sizeof(__tmp));  \
}
 
+TLV_PUT_DEFINE_INT(32)
 TLV_PUT_DEFINE_INT(64)
 
 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
@@ -4483,18 +4484,59 @@ out:
return ret;
 }
 
+static int send_fallocate(struct send_ctx *sctx, u32 flags,
+ u64 offset, u64 len)
+{
+   struct fs_path *p = NULL;
+   int ret = 0;
+
+   ASSERT(sctx-flags  BTRFS_SEND_FLAG_SUPPORT_FALLOCATE);
+
+   if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
+   sctx-total_data_size += len;
+   return 0;
+   }
+
+   p = fs_path_alloc();
+   if (!p)
+   return -ENOMEM;
+   ret = get_cur_path(sctx, sctx-cur_ino, sctx-cur_inode_gen, p);
+   if (ret  0)
+   goto out;
+
+   ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
+   if (ret  0)
+   goto out;
+   TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+   TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_FLAGS, flags);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
+   ret = send_cmd(sctx);
+
+tlv_put_failure:
+out:
+   fs_path_free(p);
+   return ret;
+}
+
 static int send_hole(struct send_ctx *sctx, u64 end)
 {
struct fs_path *p = NULL;
u64 offset = sctx-cur_inode_last_extent;
-   u64 len;
+   u64 len = end - offset;
int ret = 0;
 
if (sctx-phase == SEND_PHASE_COMPUTE_DATA_SIZE) {
-   sctx-total_data_size += end - offset;
+   sctx-total_data_size += len;
return 0;
}
 
+   if (sctx-flags  BTRFS_SEND_FLAG_SUPPORT_FALLOCATE)
+   return send_fallocate(sctx,
+ BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+ offset,
+ len);
+
p = fs_path_alloc();
if (!p)
return -ENOMEM;
@@ -4551,7 +4593,8 @@ static int send_write_or_clone(struct send_ctx *sctx,
len = btrfs_file_extent_num_bytes(path-nodes[0], ei);
}
 
-   if (offset + len  sctx-cur_inode_size)
+   if (offset  sctx-cur_inode_size 
+   offset + len  sctx-cur_inode_size)
len = sctx-cur_inode_size - offset;
if (len == 0) {
ret = 0;
@@ -4568,6 +4611,12 @@ static int send_write_or_clone(struct send_ctx *sctx,
ret = send_clone(sctx, offset, len, clone_root);
} else if (sctx-flags  BTRFS_SEND_FLAG_NO_FILE_DATA) {
ret = send_update_extent(sctx, offset, len);
+   } else if (btrfs_file_extent_disk_bytenr(path-nodes[0], ei) == 0 
+  type != BTRFS_FILE_EXTENT_INLINE 
+  (sctx-flags  BTRFS_SEND_FLAG_SUPPORT_FALLOCATE) 
+  offset  sctx-cur_inode_size) {
+   ret = send_fallocate(sctx, BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS,
+offset, len);
} else {
while (pos  len) {
l = len - pos;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 367030d..a632c0d 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -141,6 +141,10 @@ enum {
 #define BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE   (1  0)
 #define BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE  (1  1)
 
+#define BTRFS_SEND_PUNCH_HOLE_FALLOC_FLAGS\
+   (BTRFS_SEND_A_FALLOCATE_FLAG_KEEP_SIZE |  \
+BTRFS_SEND_A_FALLOCATE_FLAG_PUNCH_HOLE)
+
 #ifdef __KERNEL__
 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
 #endif
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   3   4   >