[PATCH V2] Btrfs: fix old data problem caused by aio vs dio

2012-06-26 Thread Miao Xie
The 209th case of xfstests failed because of the race between aio and dio. The
detail reason is following:
Task1   Task2   Btrfs-worker
invalidate pages
read pages
do direct io
invalidate pages fail*
finish ordered io
read data from
pages

* This step failed because the kernel found the ordered extent object that
covered the pages and thought the pages were still under busy. And then Task1
read the old data from those pages.

This patch fixes the above problem by updating the existed pages directly.

Signed-off-by: Miao Xie mi...@cn.fujitsu.com
---
 fs/btrfs/file.c  |   91 ++
 fs/btrfs/inode.c |1 +
 2 files changed, 92 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 876cddd..fc0f485 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -18,6 +18,7 @@
 
 #include linux/fs.h
 #include linux/pagemap.h
+#include linux/pagevec.h
 #include linux/highmem.h
 #include linux/time.h
 #include linux/init.h
@@ -1328,6 +1329,91 @@ static noinline ssize_t __btrfs_buffered_write(struct 
file *file,
return num_written ? num_written : ret;
 }
 
+static void btrfs_dio_update_existed_pages(struct inode *inode,
+  struct iov_iter *it,
+  loff_t pos, size_t size)
+{
+   struct address_space *mapping = inode-i_mapping;
+   struct pagevec pvec;
+   pgoff_t index;
+   pgoff_t end;
+   size_t copied;
+   loff_t copy_pos = pos;
+   int offset = pos  (PAGE_CACHE_SIZE - 1);
+   int i;
+
+   BUG_ON(pos  (PAGE_CACHE_SIZE - 1));
+   BUG_ON(size  (PAGE_CACHE_SIZE - 1));
+
+   pagevec_init(pvec, 0);
+   index = pos  PAGE_CACHE_SHIFT;
+   end = (pos + size - 1)  PAGE_CACHE_SHIFT;
+
+   while (index = end  pagevec_lookup(pvec, mapping, index,
+   min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+   i = 0;
+   while (i  pagevec_count(pvec)) {
+   struct page *page = pvec.pages[i];
+   size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset,
+size);
+
+   index = page-index;
+   if (index  end)
+   break;
+
+   lock_page(page);
+   WARN_ON(page-index != index);
+   BUG_ON(PageDirty(page));
+   BUG_ON(PageWriteback(page));
+
+   if (page-mapping != mapping) {
+   unlock_page(page);
+   i++;
+   continue;
+   }
+
+   if (!PageUptodate(page)) {
+   unlock_page(page);
+   i++;
+   continue;
+   }
+
+   if ((index  PAGE_CACHE_SHIFT)  copy_pos) {
+   copied = (index  PAGE_CACHE_SHIFT) - copy_pos;
+   iov_iter_advance(it, copied);
+   offset = 0;
+   size -= copied;
+   count = min_t(size_t, PAGE_CACHE_SIZE, size);
+   }
+
+   pagefault_disable();
+   copied = iov_iter_copy_from_user_atomic(page, it,
+   offset,
+   count);
+   pagefault_enable();
+   flush_dcache_page(page);
+
+   if (copied  count)
+   copied = 0;
+
+   iov_iter_advance(it, copied);
+   size -= copied;
+   copy_pos += copied;
+
+   if (unlikely(copied  PAGE_CACHE_SIZE - offset)) {
+   offset += copied;
+   } else {
+   offset = 0;
+   i++;
+   }
+   unlock_page(page);
+   }
+   pagevec_release(pvec);
+   cond_resched();
+   index++;
+   }
+}
+
 static ssize_t __btrfs_direct_write(struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs, loff_t pos,
@@ -1356,6 +1442,11 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
mark_inode_dirty(inode);
}
 
+   if (written  0) {
+   iov_iter_init(i, iov, nr_segs, count, 0);
+   

[Report] corrupted root csum tree result in segfault.

2012-06-26 Thread Anand Jain




# mkfs.btrfs /dev/sdd  btrfs-debug-tree -R /dev/sdd | egrep checksum

::
checksum tree key (CSUM_TREE ROOT_ITEM 0) 29376512 level 0

# btrfs-corrupt-block -l 29376512 /dev/sdd
mirror 1 logical 29376512 physical 37765120 device /dev/sdd
corrupting 29376512 copy 1
mirror 2 logical 29376512 physical 145113088 device /dev/sdd
corrupting 29376512 copy 2
# btrfsck /dev/sdd
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
read block failed check_tree_block
Couldn't setup csum tree
checking extents
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
Check tree block failed, want=29376512, have=0
read block failed check_tree_block   !!
Segmentation fault (core dumped)  --- !!
#



read_tree_block frees buf and returns NULL

struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 u32 blocksize, u64 parent_transid)
{
::

while (1) {
::
if (ignore) {
if (check_tree_block(root, eb))
printk(read block failed check_tree_block\n); 

else
printk(Csum didn't match\n);
break;
}
::
}
free_extent_buffer(eb);
return NULL;
}


we don't check if the buf is NULL and fails with
Segmentation faults in add_root_to_pending
-
static int check_extents(struct btrfs_trans_handle *trans,
 struct btrfs_root *root, int repair)
{

::
buf = read_tree_block(root-fs_info-tree_root,
  btrfs_root_bytenr(ri),
  btrfs_level_size(root,
   btrfs_root_level(ri)), 0);   
--- buf is NULL

add_root_to_pending(buf, bits, bits_nr, extent_cache,
pending, seen, reada, nodes,
found_key);

free_extent_buffer(buf);
}
---


Seems like the simple fix is to redirect btrfsck user to use
--init-csum-tree when csum root is corrupted

thanks
Anand

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch v2] Btrfs: fix error handling in __add_reloc_root()

2012-06-26 Thread Dan Carpenter
On Tue, Jun 26, 2012 at 06:51:39AM +0530, santosh prasad nayak wrote:
 I am also facing similar issue while applying this patch.
 
 [santosh@localhost linux-next]$ sudo git am mail_Dan.txt
 Patch format detection failed.
 [santosh@localhost linux-next]$
 

The problem is not on my end.  It made it to marc.info without
getting corrupted.  Marc.info strips out the From: and Subject:
headers so you'd have to add them in manually.

1) Save this file:
http://marc.info/?l=kernel-janitorsm=134062314509635q=raw

2) Edit the file and add these two lines add the top:

From: Dan Carpenter dan.carpen...@oracle.com
Subject: [patch v2] Btrfs: fix error handling in __add_reloc_root()

3) $ git am /home/dcarpenter/tmp/html2/btrfs_raw.txt
Applying: Btrfs: fix error handling in __add_reloc_root()

regards,
dan carpenter


--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] Btrfs: allow mount -o remount,compress=no

2012-06-26 Thread Arnd Hannemann
Hi Chris,

Am 16.04.2012 16:42, schrieb David Sterba:
 On Mon, Apr 16, 2012 at 03:27:51PM +0200, Arnd Hannemann wrote:
 Btrfs allows to turn on compression on a mounted and used filesystem
 by issuing mount -o remount,compress=lzo.
 This patch allows to turn compression off again
 while the filesystem is mounted. As suggested by David Sterba
 if the compress-force option was set, it is implicitly cleared
 if compression is turned off.

 Signed-off-by: Arnd Hannemann a...@arndnet.de
 
 Tested-by: David Sterba dste...@suse.cz
 
 worked perfectly, remounting back and forth with compress=lzo =no
 -force=lzo etc, checked output via 'mount'.

How show should we proceed to get above mentioned patch
(or the similar patch from Andrei Popa) merged?

Best regards,
Arnd


--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: fio reports data corruption with btrfs

2012-06-26 Thread Alex Lyakas
Hi Josef,
Mount options were noatime, nodatacow.
So you say that fio might have received ENOSPC, but didn't abort the test?

I will compile your branch and let you know.

I did not see any error messages from the kernel, except from:
Jun 25 10:04:28 vc kernel: [  436.730890] btrfs: setting nodatacow
Jun 25 10:04:28 vc kernel: [  436.744139] btrfs: no dev_stats entry
found for device /dev/sdb2 (devid 1) (OK on first mount after mkfs)
Jun 25 10:13:12 vc kernel: [  960.844149] INFO: task
flush-btrfs-2:3349 blocked for more than 120 seconds.
Jun 25 10:13:12 vc kernel: [  960.846600] echo 0 
/proc/sys/kernel/hung_task_timeout_secs disables this message.
Jun 25 10:13:12 vc kernel: [  960.847507] flush-btrfs-2   D
8180ca80 0  3349  2 0x
Jun 25 10:13:12 vc kernel: [  960.847515]  8801186337a0
0046 13e332ba 81c1d780
Jun 25 10:13:12 vc kernel: [  960.847520]  880118633fd8
880118633fd8 880118633fd8 00013840
Jun 25 10:13:12 vc kernel: [  960.847525]  81c13020
8801176f5b80 880118633790 88011fc140e8
Jun 25 10:13:12 vc kernel: [  960.847530] Call Trace:
Jun 25 10:13:12 vc kernel: [  960.847554]  [8166c239]
schedule+0x29/0x70
Jun 25 10:13:12 vc kernel: [  960.847558]  [8166c30f]
io_schedule+0x8f/0xd0
Jun 25 10:13:12 vc kernel: [  960.847574]  [812f0a3f]
get_request_wait+0xef/0x240
Jun 25 10:13:12 vc kernel: [  960.847587]  [81073a80] ?
add_wait_queue+0x60/0x60
Jun 25 10:13:12 vc kernel: [  960.847592]  [812f191f]
blk_queue_bio+0x7f/0x3a0
Jun 25 10:13:12 vc kernel: [  960.847596]  [812ee784]
generic_make_request.part.50+0x74/0xb0
Jun 25 10:13:12 vc kernel: [  960.847600]  [812eef18]
generic_make_request+0x68/0x70
Jun 25 10:13:12 vc kernel: [  960.847603]  [812eefa7]
submit_bio+0x87/0x110
Jun 25 10:13:12 vc kernel: [  960.847649]  [a006f8c7]
btrfs_map_bio+0x167/0x210 [btrfs]
Jun 25 10:13:12 vc kernel: [  960.847669]  [a00428ad]
btrfs_submit_bio_hook+0x7d/0x140 [btrfs]
Jun 25 10:13:12 vc kernel: [  960.847691]  [a00609fa]
submit_one_bio+0x6a/0xa0 [btrfs]
Jun 25 10:13:12 vc kernel: [  960.847713]  [a0061059]
flush_epd_write_bio+0x39/0x50 [btrfs]
Jun 25 10:13:12 vc kernel: [  960.847734]  [a00662c0]
extent_writepages+0x50/0x60 [btrfs]
Jun 25 10:13:12 vc kernel: [  960.847754]  [a0045ba0] ?
btrfs_submit_direct+0x1e0/0x1e0 [btrfs]
Jun 25 10:13:12 vc kernel: [  960.847759]  [81073654] ?
bit_waitqueue+0x14/0xc0
Jun 25 10:13:12 vc kernel: [  960.847779]  [a00436d8]
btrfs_writepages+0x28/0x30 [btrfs]
Jun 25 10:13:12 vc kernel: [  960.847793]  [81128191]
do_writepages+0x21/0x40
Jun 25 10:13:12 vc kernel: [  960.847805]  [811a5462]
writeback_single_inode+0x112/0x380
Jun 25 10:13:12 vc kernel: [  960.847809]  [811a5886]
writeback_sb_inodes+0x1b6/0x270
Jun 25 10:13:12 vc kernel: [  960.847813]  [811a59de]
__writeback_inodes_wb+0x9e/0xd0
Jun 25 10:13:12 vc kernel: [  960.847816]  [811a5c9b]
wb_writeback+0x28b/0x340
Jun 25 10:13:12 vc kernel: [  960.847823]  [810125c7] ?
__switch_to+0x137/0x410
Jun 25 10:13:12 vc kernel: [  960.847833]  [81197d02] ?
get_nr_dirty_inodes+0x52/0x80
Jun 25 10:13:12 vc kernel: [  960.847837]  [811a5def]
wb_check_old_data_flush+0x9f/0xb0
Jun 25 10:13:12 vc kernel: [  960.847842]  [811a72c9]
wb_do_writeback+0x149/0x1d0
Jun 25 10:13:12 vc kernel: [  960.847848]  [8105f610] ?
usleep_range+0x50/0x50
Jun 25 10:13:12 vc kernel: [  960.847852]  [811a73db]
bdi_writeback_thread+0x8b/0x290
Jun 25 10:13:12 vc kernel: [  960.847855]  [811a7350] ?
wb_do_writeback+0x1d0/0x1d0
Jun 25 10:13:12 vc kernel: [  960.847860]  [81072fe3]
kthread+0x93/0xa0
Jun 25 10:13:12 vc kernel: [  960.847868]  [81676be4]
kernel_thread_helper+0x4/0x10
Jun 25 10:13:12 vc kernel: [  960.847873]  [81072f50] ?
kthread_freezable_should_stop+0x70/0x70
Jun 25 10:13:12 vc kernel: [  960.847877]  [81676be0] ?
gs_change+0x13/0x13

Thanks,
Alex.



On Mon, Jun 25, 2012 at 10:26 PM, Josef Bacik jba...@fusionio.com wrote:
 On Mon, Jun 25, 2012 at 12:30:34PM -0600, Alex Lyakas wrote:
 Greetings everybody,

 I am running a fio test on btrfs compiled from
 git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git,
 up to commit:
 cb77fcd88569cd2b7b25ecd4086ea932a53be9b3 Btrfs: delay iput with async extents
 including this commit.

 Below is a fio configuration file, and later fio textual output.
 Here:
 https://docs.google.com/folder/d/0B1AuaIB8xZtbNTRuSW1zVGozWFE/edit
 are expected vs received mismatch reports. Strangely, when I read
 the mismatched block from the file reported as corrupted by fio, I
 receive data different both from expected and received blocks that
 fio reports. I added one such file (job0.1.0.88576.now) to the
 pastebin as well.

 If you think that my fio configuration file is faulty, 

Re: [PATCH] Btrfs: fix wrong check during log recovery

2012-06-26 Thread David Sterba
On Tue, Jun 26, 2012 at 11:59:09AM +0800, Liu Bo wrote:
 When we're evicting an inode during log recovery, we need to ensure that the 
 inode
 is not in orphan state any more, which means inode's run_time flags has _no_
 BTRFS_INODE_HAS_ORPHAN_ITEM.  Thus, the BUG_ON was triggered because of a 
 wrong
 check for the flags.

Right. I looked into the original commit 8a35d95ff4680a45, this was the
only case where the conversion to the atomics was reversed. Thanks for
the quick fix!

Patch should go to 3.5-rc

 
 Signed-off-by: Liu Bo liubo2...@cn.fujitsu.com
Reviewed-by: David Sterba dste...@suse.cz
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: New btrfs-progs integration branch

2012-06-26 Thread Hugo Mills
On Tue, Jun 26, 2012 at 11:58:41AM +0300, Alex Lyakas wrote:
 Hi Hugo,
 forgive me, but I am somewhat confused.
 What is the main repo of btrfs-progs, if there is such thing?
 I see patches coming in, but no updates to
 git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs.git,
 which I thought was the one.
 
 Can you pls clarify where should I pull updates from for btrfs-progs?

   The official source for btrfs-progs is Chris's one, at the URL
above. The integration repo is kind of a staging area where I pull in
as many patches as I can and get them a bit more visibility. We don't
really have a well-defined workflow here.

   It depends on what you intend doing: if you want to make packages
for your distribution, use Chris's repo. If you want something
reasonably stable and tested, use Chris's repo. If there's some
experimental kernel feature you want to test out, use integration. If
you want to be helpful and test out new patches and report problems
with them, use integration.

   Hugo.

 Thanks,
 Alex.
 
 
 
 On Tue, Jun 5, 2012 at 10:09 PM, Hugo Mills h...@carfax.org.uk wrote:
    I've just pushed out a new integration branch to my git repo. This
  is purely bugfix patches -- there are no new features in this issue of
  the integration branch. I've got a stack of about a dozen more patches
  with new features in them still to go. I'll be working on those
  tomorrow. As always, there's minimal testing involved here, but it
  does at least compile on my system(*).
 
    The branch is fetchable with git from:
 
  http://git.darksatanic.net/repo/btrfs-progs-unstable.git/ 
  integration-20120605
 
    And viewable in human-readable form at:
 
  http://git.darksatanic.net/cgi/gitweb.cgi?p=btrfs-progs-unstable.git
 
    Shortlog is below.
 
    Hugo.
 
  (*) I don't care about works-on-my-machine. We are not shipping your
  machine!
 
  
 
  Akira Fujita (1):
       Btrfs-progs: Fix manual of btrfs command
 
  Chris Samuel (1):
       Fix set-dafault typo in cmds-subvolume.c
 
  Csaba Tóth (1):
       mkfs.btrfs on ARM
 
  Goffredo Baroncelli (1):
       scrub_fs_info( ) file handle leaking
 
  Hubert Kario (2):
       Fix segmentation fault when opening invalid file system
       man: fix btrfs man page formatting
 
  Jan Kara (1):
       mkfs: Handle creation of filesystem larger than the first device
 
  Jim Meyering (5):
       btrfs_scan_one_dir: avoid use-after-free on error path
       mkfs: use strdup in place of strlen,malloc,strcpy sequence
       restore: don't corrupt stack for a zero-length command-line argument
       avoid several strncpy-induced buffer overruns
       mkfs: avoid heap-buffer-read-underrun for zero-length size arg
 
  Josef Bacik (3):
       Btrfs-progs: make btrfsck aware of free space inodes
       Btrfs-progs: make btrfs filesystem show uuid actually work
       btrfs-progs: enforce block count on all devices in mkfs
 
  Miao Xie (3):
       Btrfs-progs: fix btrfsck's snapshot wrong unresolved refs
       Btrfs-progs, btrfs-corrupt-block: fix the wrong usage
       Btrfs-progs, btrfs-map-logical: Fix typo in usage
 
  Phillip Susi (2):
       btrfs-progs: removed extraneous whitespace from mkfs man page
       btrfs-progs: document --rootdir mkfs switch
 
  Sergei Trofimovich (2):
       Makefile: use $(CC) as a compilers instead of $(CC)/gcc
       Makefile: use $(MAKE) instead of hardcoded 'make'
 
  Shawn Bohrer (1):
       btrfs-progs: Update resize documentation
 
  Wang Sheng-Hui (1):
       btrfs-progs: cleanup: remove the redundant BTRFS_CSUM_TYPE_CRC32 macro 
  def
 

-- 
=== Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk ===
  PGP key: 515C238D from wwwkeys.eu.pgp.net or http://www.carfax.org.uk
  --- ...  one ping(1) to rule them all, and in the ---  
 darkness bind(2) them.  


signature.asc
Description: Digital signature


Re: how to cleanup old superblock

2012-06-26 Thread David Sterba
On Tue, Jun 26, 2012 at 10:53:24AM +0200, Xavier Nicollet wrote:
 Le 26 June 2012 ? 00:49, David Sterba a écrit:
  On Mon, Jun 25, 2012 at 11:54:50PM +0400, Dmitry MiksIr wrote:
   Hello!
   Long time ago I created btrfs on /dev/sda
   After some changes btrfs moved to /dev/sda1 (well, to md, and sda1 is part
   of md).
   
   As result, btrfs fi show show me 2 filesystems: new one and old one.
   Probably I need to do some cleaning. Can someone tell me what to do.
  
  quick aid is to run this command:
  
  dd if=/dev/zero of=/dev/sda bs=1k count=4 seek=64
  
  (deletes just the btrfs superblock and will not touch anything
  else)
 
 Updated the wiki: 
 https://btrfs.wiki.kernel.org/index.php/Problem_FAQ#How_to_clean_up_old_superblock_.3F

Thanks. I redid the calculations and the statement that it 'will not
touch anything else' may not be correct in rare cases.

Depends where the first partition starts. If it's at sector 63, like
fdisk created by default in the past, overwriting the offset 64k
(relative to /dev/sda) is actually

offset = 64*1024 - 63*512 = 33280

relative to /dev/sda1.

If there are metadata (eg. raid, other fs) at this offset, then it can
cause problems. For safety reasons, a btrfs filesystem does not touch
first megabyte for allocations, so this does not clash. I'm not sure
about md-raid, but if the sda:64k block is used, then it would be
overwritten anyway.

Newer fdisks start the first partition at 1M, so it's safe.

Wiki page updated.

For the reference - this feature belongs to progs, discussed here
http://thread.gmane.org/gmane.comp.file-systems.btrfs/17065


david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: how to cleanup old superblock

2012-06-26 Thread Sander
David Sterba wrote (ao):
 Thanks. I redid the calculations and the statement that it 'will not
 touch anything else' may not be correct in rare cases.

What about wipefs?

wipefs allows to erase filesystem or raid signatures (magic strings)
from the device to make the filesystem invisible for libblkid. wipefs
does not erase the whole filesystem or any other data from the device.
When used without options -a or -o, it lists all visible filesystems and
offsets of their signatures.

Sander
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: use _IOR for BTRFS_IOC_SUBVOL_GETFLAGS

2012-06-26 Thread David Sterba
On Mon, Jun 25, 2012 at 11:36:12PM +0200, Alexander Block wrote:
 We used the wrong ioctl macro for the getflags ioctl before.
 As we don't have the set/getflags ioctls in the user space ioctl.h
 at the moment, it's safe to fix it now.

The _IOW is clearly wrong here, the direction is 'from kernel', so _IOR.
I hope it's yet ok to change it, though it's a user visible change.

 
 Signed-off-by: Alexander Block abloc...@googlemail.com
Reviewed-by: David Sterba dste...@suse.cz

 ---
  fs/btrfs/ioctl.h |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
 index 497c530..e440aa6 100644
 --- a/fs/btrfs/ioctl.h
 +++ b/fs/btrfs/ioctl.h
 @@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
  #define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
  #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
  struct btrfs_ioctl_vol_args_v2)
 -#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
 +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
  #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
  #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
 struct btrfs_ioctl_scrub_args)
 -- 
 1.7.10
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 3.5-rc4: BTRFS unmountable after hard lockup

2012-06-26 Thread Martin Steigerwald
Am Dienstag, 26. Juni 2012 schrieb Liu Bo:
 On 06/26/2012 06:18 AM, David Sterba wrote:
  3756 if (root-fs_info-log_root_recovering) {
  3757 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
  3758 BTRFS_I(inode)-runtime_flags)); 
  3759 goto no_delete;
  3760 }
  
  and it happened during log replay, as you found already, fixable by
  running the zero-log utility. Another way is to mount read-only, this
  skips log replay.
  
  I think there could be a logic error, as this probably happens only
  during log replay when the orphan bit is not in sync with link count,
  but I saw that this should be handled in the fixup_inode_link_counts
  call path. CCing Josef, if he has an idea.
 
 It is a logic error, but mostly a finger wrong from Josef IMO... :)
 
 I'll send a patch for it.

Thanks for looking into it. 

Since my BTRFS is up and running again I can´t test a patch easily however.
I´d have to unplug the disk or crash my laptop several times to trigger it
again I bet.

-- 
Martin 'Helios' Steigerwald - http://www.Lichtvoll.de
GPG: 03B0 0D6C 0040 0710 4AFA  B82F 991B EAAC A599 84C7
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: return error of btrfs_update_inode() to caller

2012-06-26 Thread David Sterba
On Tue, Jun 26, 2012 at 12:25:22PM +0900, Tsutomu Itoh wrote:
 We didn't check error of btrfs_update_inode(), but that error looks
 easy to bubble back up.

Yep, the callers check for the retvals, and the one missed in
__btrfs_unlink_inode seems serious, as unlink_inode and rename would not
see the error (probably ENOSPC).

Reviewed-by: David Sterba dste...@suse.cz
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 3.5-rc4: BTRFS unmountable after hard lockup

2012-06-26 Thread Josef Bacik
On Mon, Jun 25, 2012 at 09:47:33PM -0600, Liu Bo wrote:
 On 06/26/2012 06:18 AM, David Sterba wrote:
 
  3756 if (root-fs_info-log_root_recovering) {
  3757 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
  3758  BTRFS_I(inode)-runtime_flags));
  3759 goto no_delete;
  3760 }
  
  and it happened during log replay, as you found already, fixable by
  running the zero-log utility. Another way is to mount read-only, this
  skips log replay.
  
  I think there could be a logic error, as this probably happens only
  during log replay when the orphan bit is not in sync with link count,
  but I saw that this should be handled in the fixup_inode_link_counts
  call path. CCing Josef, if he has an idea.
  
 
 
 It is a logic error, but mostly a finger wrong from Josef IMO... :)
 
 I'll send a patch for it.

Heh oops, sorry about that ;),

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2] Btrfs: fix old data problem caused by aio vs dio

2012-06-26 Thread Josef Bacik
On Tue, Jun 26, 2012 at 12:07:17AM -0600, Miao Xie wrote:
 The 209th case of xfstests failed because of the race between aio and dio. The
 detail reason is following:
   Task1   Task2   Btrfs-worker
   invalidate pages
   read pages
   do direct io
   invalidate pages fail*
   finish ordered io
   read data from
   pages
 

This just papers over the problem and makes DIO touch page cache which it
shouldn't be doing if it's working properly, so NAK.  We need to figure out why
exactly my patch didn't work, since it should be working.  The write should be
doing

lock_extent
setup ordered extent
unlock_extent

and the read should be doing

lock_extent
check for ordered extent
if there is one unlock and wait and then loop
do read
unlock_extent

there should be no room for races in here.  The patch I sent earlier should have
caught if we had done a read between the invalidate and the locking and should
be invalidating the range again and then checking.  If this isn't working then
something else is going sideways and we really need to figure out what it is
rather than just working around the issue, as it will likely bite us in a
different way later.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: check return value of btrfs_set_extent_delalloc()

2012-06-26 Thread David Sterba
On Tue, Jun 26, 2012 at 12:22:10PM +0900, Tsutomu Itoh wrote:
 btrfs_set_extent_delalloc() has the possibility of returning the error.
 So I add the code in which the return value of btrfs_set_extent_delalloc()
 is checked.

The caller is cluster_pages_for_defrag, the only error I see returned
via __set_extent_bit is -EEXIST, other errors BUG directly or via
extent_io_tree_panic() .

If the error happens, then the [page_start,page_end-1] is already set
for delalloc, that's probably a bug and should be caught.

There are two more unchecked calls to btrfs_set_extent_delalloc:

inode.c:btrfs_writepage_fixup_worker

1729 btrfs_set_extent_delalloc(inode, page_start, page_end, 
cached_state);
1730 ClearPageChecked(page);
1731 set_page_dirty(page);

IIRC from the days full of fixup worker fun, the reason why this is safe
to ignore is because the call chain leading here is exactly due to missing
delalloc bits on the page.

relocation.c:relocate_file_extent_cluster

3034 btrfs_set_extent_delalloc(inode, page_start, page_end, 
NULL);
3035 set_page_dirty(page);

hmm relocation ... :)


Anyway, I'd like to let Josef take another look at your patch.

 
 Signed-off-by: Tsutomu Itoh t-i...@jp.fujitsu.com
 ---
  fs/btrfs/ioctl.c |7 +--
  1 files changed, 5 insertions(+), 2 deletions(-)
 
 diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
 index 0e92e57..95e27d6 100644
 --- a/fs/btrfs/ioctl.c
 +++ b/fs/btrfs/ioctl.c
 @@ -1016,13 +1016,16 @@ again:
   }
  
  
 - btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
 -   cached_state);
 + ret = btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
 + cached_state);
  
   unlock_extent_cached(BTRFS_I(inode)-io_tree,
page_start, page_end - 1, cached_state,
GFP_NOFS);
  
 + if (ret)
 + goto out;
 +
   for (i = 0; i  i_done; i++) {
   clear_page_dirty_for_io(pages[i]);
   ClearPageChecked(pages[i]);
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix dio write vs buffered read race V2

2012-06-26 Thread Josef Bacik
From: Josef Bacik jo...@redhat.com

Miao pointed out there's a problem with mixing dio writes and buffered
reads.  If the read happens between us invalidating the page range and
actually locking the extent we can bring in pages into page cache.  Then
once the write finishes if somebody tries to read again it will just find
uptodate pages and we'll read stale data.  So we need to lock the extent and
check for uptodate bits in the range.  If there are uptodate bits we need to
unlock and invalidate again.  This will keep this race from happening since
we will hold the extent locked until we create the ordered extent, and then
teh read side always waits for ordered extents.  Thanks,

Signed-off-by: Josef Bacik jo...@redhat.com
---
V1-V2
-Use invalidate_inode_pages2_range since it will actually unmap existing pages
-Do a filemap_write_and_wait_range in case of mmap
 fs/btrfs/inode.c |   42 +++---
 1 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9d8c45d..a430549 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6360,12 +6360,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb 
*iocb,
 */
ordered = btrfs_lookup_ordered_range(inode, lockstart,
 lockend - lockstart + 1);
-   if (!ordered)
+
+   /*
+* We need to make sure there are no buffered pages in this
+* range either, we could have raced between the invalidate in
+* generic_file_direct_write and locking the extent.  The
+* invalidate needs to happen so that reads after a write do not
+* get stale data.
+*/
+   if (!ordered  (!writing ||
+   !test_range_bit(BTRFS_I(inode)-io_tree,
+   lockstart, lockend, EXTENT_UPTODATE, 0,
+   cached_state)))
break;
+
unlock_extent_cached(BTRFS_I(inode)-io_tree, lockstart, 
lockend,
 cached_state, GFP_NOFS);
-   btrfs_start_ordered_extent(inode, ordered, 1);
-   btrfs_put_ordered_extent(ordered);
+
+   if (ordered) {
+   btrfs_start_ordered_extent(inode, ordered, 1);
+   btrfs_put_ordered_extent(ordered);
+   } else {
+   /* Screw you mmap */
+   ret = filemap_write_and_wait_range(file-f_mapping,
+  lockstart,
+  lockend);
+   if (ret)
+   goto out;
+
+   /*
+* If we found a page that couldn't be invalidated just
+* fall back to buffered.
+*/
+   ret = invalidate_inode_pages2_range(file-f_mapping,
+   lockstart  PAGE_CACHE_SHIFT,
+   lockend  PAGE_CACHE_SHIFT);
+   if (ret) {
+   if (ret == -EBUSY)
+   ret = 0;
+   goto out;
+   }
+   }
+
cond_resched();
}
 
-- 
1.7.7.6

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: check return value of btrfs_set_extent_delalloc()

2012-06-26 Thread Josef Bacik
On Tue, Jun 26, 2012 at 07:26:53AM -0600, David Sterba wrote:
 On Tue, Jun 26, 2012 at 12:22:10PM +0900, Tsutomu Itoh wrote:
  btrfs_set_extent_delalloc() has the possibility of returning the error.
  So I add the code in which the return value of btrfs_set_extent_delalloc()
  is checked.
 
 The caller is cluster_pages_for_defrag, the only error I see returned
 via __set_extent_bit is -EEXIST, other errors BUG directly or via
 extent_io_tree_panic() .
 
 If the error happens, then the [page_start,page_end-1] is already set
 for delalloc, that's probably a bug and should be caught.
 
 There are two more unchecked calls to btrfs_set_extent_delalloc:
 
 inode.c:btrfs_writepage_fixup_worker
 
 1729 btrfs_set_extent_delalloc(inode, page_start, page_end, 
 cached_state);
 1730 ClearPageChecked(page);
 1731 set_page_dirty(page);
 
 IIRC from the days full of fixup worker fun, the reason why this is safe
 to ignore is because the call chain leading here is exactly due to missing
 delalloc bits on the page.
 
 relocation.c:relocate_file_extent_cluster
 
 3034 btrfs_set_extent_delalloc(inode, page_start, page_end, 
 NULL);
 3035 set_page_dirty(page);
 
 hmm relocation ... :)
 
 
 Anyway, I'd like to let Josef take another look at your patch.


Even better, we clear delalloc right before doing the set and we have the pages
locked, so theres no way we can get EEXIST here, so I'll just drop this patch.
Thanks Dave,

Josef 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] Btrfs: restore restriper state on all mounts

2012-06-26 Thread David Sterba
On Fri, Jun 22, 2012 at 09:24:12PM +0300, Ilya Dryomov wrote:
 Fix a bug that triggered asserts in btrfs_balance() in both normal and
 resume modes -- restriper state was not properly restored on read-only
 mounts.  This factors out resuming code from btrfs_restore_balance(),
 which is now also called earlier in the mount sequence to avoid the
 problem of some early writes getting the old profile.
 
 Signed-off-by: Ilya Dryomov idryo...@gmail.com
 ---
 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
 index 77872da..dae7cd6 100644
 --- a/fs/btrfs/disk-io.c
 +++ b/fs/btrfs/disk-io.c
 @@ -2492,9 +2497,6 @@ retry_root_backup:
   err = btrfs_orphan_cleanup(fs_info-tree_root);
   up_read(fs_info-cleanup_work_sem);
  
 - if (!err)
 - err = btrfs_recover_balance(fs_info-tree_root);
 -
   if (err) {
   close_ctree(tree_root);
   return err;
 @@ -2518,6 +2520,9 @@ fail_cleaner:
  fail_block_groups:
   btrfs_free_block_groups(fs_info);
  
 +fail_balance_ctl:
 + kfree(fs_info-balance_ctl);

I think you need to set fs_info-balance_ctl to NULL, otherwise this
could lead to double free from free_fs_info. I was looking along the
call paths and didn't see free_fs_info called on the mount failure path:

vfs-mount
  btrfs_mount
btrfs_fill_super
  open_ctree
(recover balance fails, frees ctl)

error is propagated back to vfs, no other fs callback is done (like
kill_super which does call free_fs_info).

The only exit path that is not going through free_fs_info is after error
from btrfs_fill_super, and this can fail from various reasons.

Either I'm missing something, or we leak a btrfs_fs_info every time a
mount fails ...


Back to your patch, apart from the balance_ctl pointer reset, both are
ok and given the number of bug reports [useless padding text here]

  this should go to 3.5-rc.


david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs deadlock in 3.5-rc3

2012-06-26 Thread Stefan Priebe

Am 25.06.2012 22:23, schrieb Josef Bacik:

On Mon, Jun 25, 2012 at 02:20:31PM -0600, Stefan Priebe wrote:

Am 25.06.2012 22:11, schrieb Josef Bacik:

On Mon, Jun 25, 2012 at 01:33:09PM -0600, Stefan Priebe wrote:

With v3.4 the same. Can't go back more as this really results in very
fast corruption. Any ideas how to debug?



What workload are you running?  I have a ssd here with discard support I can try
and reproduce on.  Thanks,


i'm using fio with 50 jobs and randwrite of 4k blocks in ceph but i
don't know which load ceph then exactly generates. ;-(



Thats fine, I have this handy create a local ceph cluster script from an
earlier problem, just send me your fio job and I'll run it locally.  Thanks,


Where you able to find anything? Can i do more or different testing?

Stefan
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] Btrfs: restore restriper state on all mounts

2012-06-26 Thread Ilya Dryomov
First of all, thanks for reviewing!

On Tue, Jun 26, 2012 at 06:17:39PM +0200, David Sterba wrote:
 On Fri, Jun 22, 2012 at 09:24:12PM +0300, Ilya Dryomov wrote:
  Fix a bug that triggered asserts in btrfs_balance() in both normal and
  resume modes -- restriper state was not properly restored on read-only
  mounts.  This factors out resuming code from btrfs_restore_balance(),
  which is now also called earlier in the mount sequence to avoid the
  problem of some early writes getting the old profile.
  
  Signed-off-by: Ilya Dryomov idryo...@gmail.com
  ---
  diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
  index 77872da..dae7cd6 100644
  --- a/fs/btrfs/disk-io.c
  +++ b/fs/btrfs/disk-io.c
  @@ -2492,9 +2497,6 @@ retry_root_backup:
  err = btrfs_orphan_cleanup(fs_info-tree_root);
  up_read(fs_info-cleanup_work_sem);
   
  -   if (!err)
  -   err = btrfs_recover_balance(fs_info-tree_root);
  -
  if (err) {
  close_ctree(tree_root);
  return err;
  @@ -2518,6 +2520,9 @@ fail_cleaner:
   fail_block_groups:
  btrfs_free_block_groups(fs_info);
   
  +fail_balance_ctl:
  +   kfree(fs_info-balance_ctl);
 
 I think you need to set fs_info-balance_ctl to NULL, otherwise this
 could lead to double free from free_fs_info. I was looking along the

Yes, I do.  I meant to call unset_balance_control(fs_info) there, but
changed it to kfree(), because of the BUG_ON() in the former.

unset_balance_control(), of course, sets -balance_ctl to NULL ;)

 call paths and didn't see free_fs_info called on the mount failure path:
 
 vfs-mount
   btrfs_mount
 btrfs_fill_super
   open_ctree
 (recover balance fails, frees ctl)
 
 error is propagated back to vfs, no other fs callback is done (like
 kill_super which does call free_fs_info).
 
 The only exit path that is not going through free_fs_info is after error
 from btrfs_fill_super, and this can fail from various reasons.
 
 Either I'm missing something, or we leak a btrfs_fs_info every time a
 mount fails ...

No, we don't, you just missed it.  It's freed from btrfs_kill_super(),
which is called from deactivate_locked_super() after btrfs_fill_super()
errors out.

 
 
 Back to your patch, apart from the balance_ctl pointer reset, both are
 ok and given the number of bug reports [useless padding text here]
 
   this should go to 3.5-rc.

Thanks,

Ilya
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: System Policy for Filenames

2012-06-26 Thread Aaron Peterson
Billy,

Thank you! I will look into FUSE.

Ultimately, I want my / to be mounted with these rules,  I will need a
boot loader to be able to handle it.

 I am wondering if filesystem software has hooks for AppArmor or
SELinux, or some other Linux Security Module would be appropriated to
add to filesystem code?

 Also, I tried joining a linux-fsdev mailing list, but it appears to be defunct.

Oh, This is interesting:
http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html


 -AP


 On Tue, Jun 26, 2012 at 11:03 AM, Billy Crook billycr...@gmail.com wrote:
 On Fri, Jun 22, 2012 at 4:06 AM, Aaron Peterson
 myusualnickn...@gmail.com wrote:
  I would like to make a system policy that restricts the characters
 used in a filename, tests filenames by regular expression, and
 enforces case-insensitive-compatible exclusivity.

 Where should I start?

 I would find a filesystem-agnostic mailinglist.  None of these
 objectives appear to have anything to do with btrfs.  There is
 probably a generic fs layer that would be the appropriate place for
 this, if not FUSE.

 You might also consider forcing users to access the fs through SAMBA
 which has similar capabilities already.
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: New btrfs-progs integration branch

2012-06-26 Thread Alex Lyakas
Thanks, Hugo.
At this point I mostly want to learn and stay up-to-date with new
patches coming in.

Alex.



On Tue, Jun 26, 2012 at 12:58 PM, Hugo Mills h...@carfax.org.uk wrote:
 On Tue, Jun 26, 2012 at 11:58:41AM +0300, Alex Lyakas wrote:
 Hi Hugo,
 forgive me, but I am somewhat confused.
 What is the main repo of btrfs-progs, if there is such thing?
 I see patches coming in, but no updates to
 git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs.git,
 which I thought was the one.

 Can you pls clarify where should I pull updates from for btrfs-progs?

   The official source for btrfs-progs is Chris's one, at the URL
 above. The integration repo is kind of a staging area where I pull in
 as many patches as I can and get them a bit more visibility. We don't
 really have a well-defined workflow here.

   It depends on what you intend doing: if you want to make packages
 for your distribution, use Chris's repo. If you want something
 reasonably stable and tested, use Chris's repo. If there's some
 experimental kernel feature you want to test out, use integration. If
 you want to be helpful and test out new patches and report problems
 with them, use integration.

   Hugo.

 Thanks,
 Alex.



 On Tue, Jun 5, 2012 at 10:09 PM, Hugo Mills h...@carfax.org.uk wrote:
    I've just pushed out a new integration branch to my git repo. This
  is purely bugfix patches -- there are no new features in this issue of
  the integration branch. I've got a stack of about a dozen more patches
  with new features in them still to go. I'll be working on those
  tomorrow. As always, there's minimal testing involved here, but it
  does at least compile on my system(*).
 
    The branch is fetchable with git from:
 
  http://git.darksatanic.net/repo/btrfs-progs-unstable.git/ 
  integration-20120605
 
    And viewable in human-readable form at:
 
  http://git.darksatanic.net/cgi/gitweb.cgi?p=btrfs-progs-unstable.git
 
    Shortlog is below.
 
    Hugo.
 
  (*) I don't care about works-on-my-machine. We are not shipping your
  machine!
 
  
 
  Akira Fujita (1):
       Btrfs-progs: Fix manual of btrfs command
 
  Chris Samuel (1):
       Fix set-dafault typo in cmds-subvolume.c
 
  Csaba Tóth (1):
       mkfs.btrfs on ARM
 
  Goffredo Baroncelli (1):
       scrub_fs_info( ) file handle leaking
 
  Hubert Kario (2):
       Fix segmentation fault when opening invalid file system
       man: fix btrfs man page formatting
 
  Jan Kara (1):
       mkfs: Handle creation of filesystem larger than the first device
 
  Jim Meyering (5):
       btrfs_scan_one_dir: avoid use-after-free on error path
       mkfs: use strdup in place of strlen,malloc,strcpy sequence
       restore: don't corrupt stack for a zero-length command-line argument
       avoid several strncpy-induced buffer overruns
       mkfs: avoid heap-buffer-read-underrun for zero-length size arg
 
  Josef Bacik (3):
       Btrfs-progs: make btrfsck aware of free space inodes
       Btrfs-progs: make btrfs filesystem show uuid actually work
       btrfs-progs: enforce block count on all devices in mkfs
 
  Miao Xie (3):
       Btrfs-progs: fix btrfsck's snapshot wrong unresolved refs
       Btrfs-progs, btrfs-corrupt-block: fix the wrong usage
       Btrfs-progs, btrfs-map-logical: Fix typo in usage
 
  Phillip Susi (2):
       btrfs-progs: removed extraneous whitespace from mkfs man page
       btrfs-progs: document --rootdir mkfs switch
 
  Sergei Trofimovich (2):
       Makefile: use $(CC) as a compilers instead of $(CC)/gcc
       Makefile: use $(MAKE) instead of hardcoded 'make'
 
  Shawn Bohrer (1):
       btrfs-progs: Update resize documentation
 
  Wang Sheng-Hui (1):
       btrfs-progs: cleanup: remove the redundant BTRFS_CSUM_TYPE_CRC32 
  macro def
 

 --
 === Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk ===
  PGP key: 515C238D from wwwkeys.eu.pgp.net or http://www.carfax.org.uk
          --- ...  one ping(1) to rule them all, and in the ---
                         darkness bind(2) them.
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


btrfs oops with kernel 3.5-rc4

2012-06-26 Thread Nathan A. Mourey II
Oops with kernel v3.5-rc4.  This error produced while:

emerge --sync   emerge --update --newuse --deep --with-bdeps=y @world  
emerge --depclean  revdep-rebuild
and 
stress --cpu 1 --io 8 -d 8  --vm 4  -t 8h

ver_linux.sh was ran on stable v3.4.4 system.  

Linux peach.example.com 3.4.4 #2 SMP Sun Jun 24 20:38:42 EDT 2012 i686
Intel(R) Pentium(R) 4 CPU 2.80GHz GenuineIntel GNU/Linux

 Gnu C  4.5.3
 Gnu make   3.82
 binutils   2.21.1
 util-linux ./ver_linux: line 23: fdformat: command not found
 mount  support
 module-init-tools  found
 quota-tools3.17.
 Linux C Library2.14.1
 Dynamic linker (ldd)   2.14.1
 Procps 3.2.8
 Kbd1.15.3wip
 Sh-utils   8.14
 Modules Loaded ext3 jbd ehci_hcd

 Oops 


Jun 25 21:49:35 peach klogd: [15110.648485] BUG: unable to handle kernel paging 
request at fedc
Jun 25 21:49:35 peach klogd: [15110.648527] IP: [c1234292] 
btrfs_finish_ordered_io+0x22/0x470
Jun 25 21:49:35 peach klogd: [15110.648545] *pde = 018b0067 *pte = 
Jun 25 21:49:35 peach klogd: [15110.648558] Oops:  [#1] SMP
Jun 25 21:49:35 peach klogd: [15110.648571] Modules linked in: ext3 jbd ehci_hcd
Jun 25 21:49:35 peach klogd: [15110.648584]
Jun 25 21:49:35 peach klogd: [15110.648596] Pid: 28814, comm: btrfs-endio-wri 
Not tainted 3.5.0-rc4 #3 HP Pavilion 061 DM170A-ABA A350N/'P4SD-LA'
Jun 25 21:49:35 peach klogd: [15110.648618] EIP: 0060:[c1234292] EFLAGS: 
00010293 CPU: 1
Jun 25 21:49:35 peach klogd: [15110.648633] EIP is at 
btrfs_finish_ordered_io+0x22/0x470
Jun 25 21:49:35 peach klogd: [15110.648646] EAX: fedc EBX: f027ce40 ECX: 
f027ceb4 EDX: 
Jun 25 21:49:35 peach klogd: [15110.648661] ESI: f027cea0 EDI: db42624c EBP: 
db42626c ESP: f057ff08
Jun 25 21:49:35 peach klogd: [15110.648676]  DS: 007b ES: 007b FS: 00d8 GS: 
 SS: 0068
Jun 25 21:49:35 peach klogd: [15110.648690] CR0: 8005003b CR2: fedc CR3: 
0458b000 CR4: 07d0
Jun 25 21:49:35 peach klogd: [15110.648706] DR0:  DR1:  DR2: 
 DR3: 
Jun 25 21:49:35 peach klogd: [15110.648721] DR6: 0ff0 DR7: 0400
Jun 25 21:49:35 peach klogd: [15110.648733] Process btrfs-endio-wri (pid: 
28814, ti=f057e000 task=ca0bf2a0 task.ti=f057e000)
Jun 25 21:49:35 peach klogd: [15110.648750] Stack:
Jun 25 21:49:35 peach klogd: [15110.648764]  f057ff3c 0286 c103a54c 
0286 f057ff3c 00e1fe18 f057ff3c c103a5b1
Jun 25 21:49:35 peach klogd: [15110.648787]  f7078000 c164c7b4  
0001   00200200 00e3d2d8
Jun 25 21:49:35 peach klogd: [15110.648810]  fedc c10399a0 010bf2a0 
 0286 db426240 f027cea0 db42624c
Jun 25 21:49:35 peach klogd: [15110.648826] Call Trace: Jun 25 21:49:35 peach 
klogd: [15110.648841]  [c103a54c] ?  try_to_del_timer_sync+0x5c/0xa0
Jun 25 21:49:35 peach klogd: [15110.648857]  [c103a5b1] ?  
del_timer_sync+0x21/0x40
Jun 25 21:49:35 peach klogd: [15110.648874]  [c164c7b4] ?  
schedule_timeout+0x124/0x220
Jun 25 21:49:35 peach klogd: [15110.648890]  [c10399a0] ? cascade+0x70/0x70
Jun 25 21:49:35 peach klogd: [15110.648904]  [c12641ce] ?  
worker_loop+0x7e/0x440
Jun 25 21:49:35 peach klogd: [15110.648921]  [c1264150] ?  
btrfs_queue_worker+0x2b0/0x2b0
Jun 25 21:49:35 peach klogd: [15110.648937]  [c104984c] ? kthread+0x7c/0x90
Jun 25 21:49:35 peach klogd: [15110.648950]  [c10497d0] ?  
kthread_worker_fn+0x100/0x100
Jun 25 21:49:35 peach klogd: [15110.648966]  [c164f5b6] ?  
kernel_thread_helper+0x6/0xd
Jun 25 21:49:35 peach klogd: [15110.649032] Code: ff ff 8d b4 26 00 00 00 00 55 
57 56 53 89 c3 83 ec 54 8b 40 34 c6 44 24 4b 01 89 44 24 30 8b 54 24 30 2d 24 
01 00 00 89 44 24 40 8b b2 dc fe ff ff 81 ea 04 01 00 00 89 54 24 4c c7 44 24 
50 00
Jun 25 21:49:35 peach klogd: [15110.649087] EIP: [c1234292] 
btrfs_finish_ordered_io+0x22/0x470 SS:ESP 0068:f057ff08
Jun 25 21:49:35 peach klogd: [15110.649121] CR2: fedc
Jun 25 21:49:35 peach klogd: [15110.649169] [drm] nouveau :01:00.0: Setting 
dpms mode 3 on vga encoder (output 0)
Jun 25 21:49:35 peach klogd: [15110.666447] [drm] nouveau :01:00.0: Setting 
dpms mode 0 on vga encoder (output 0)
Jun 25 21:49:35 peach klogd: [15110.666447] [drm] nouveau :01:00.0: Output 
VGA-1 is running on CRTC 0 using output A
Jun 25 21:49:35 peach klogd: [15110.666447] ---[ end trace 676eca54f3265f4d ]---
Jun 25 21:57:58 peach klogd: [15614.172223] [drm] nouveau :01:00.0: Setting 
dpms mode 3 on vga encoder (output 0)
Jun 25 22:51:57 peach klogd: klogd 1.4.1, log source = /proc/kmsg started.
Jun 25 22:51:57 peach klogd: Cannot find map file.

 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: change how we indicate we're adding csums

2012-06-26 Thread Josef Bacik
There is weird logic I had to put in place to make sure that when we were
adding csums that we'd used the delalloc block rsv instead of the global
block rsv.  Part of this meant that we had to free up our transaction
reservation before we ran the delayed refs since csum deletion happens
during the delayed ref work.  The problem with this is that when we release
a reservation we will add it to the global reserve if it is not full in
order to keep us going along longer before we have to force a transaction
commit.  By releasing our reservation before we run delayed refs we don't
get the opportunity to drain down the global reserve for the work we did, so
we won't refill it as often.  This isn't a problem per-se, it just results
in us possibly committing transactions more and more often, and in rare
cases could cause those WARN_ON()'s to pop in use_block_rsv because we ran
out of space in our block rsv.

This also helps us by holding onto space while the delayed refs run so we
don't end up with as many people trying to do things at the same time, which
again will help us not force commits or hit the use_block_rsv warnings.
Thanks,

Signed-off-by: Josef Bacik jba...@fusionio.com
---
 fs/btrfs/extent-tree.c |8 +++-
 fs/btrfs/file-item.c   |2 ++
 fs/btrfs/transaction.c |   22 --
 fs/btrfs/transaction.h |1 +
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 570ac47..e552ba5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3962,7 +3962,10 @@ static struct btrfs_block_rsv *get_block_rsv(
 {
struct btrfs_block_rsv *block_rsv = NULL;
 
-   if (root-ref_cows || root == root-fs_info-csum_root)
+   if (root-ref_cows)
+   block_rsv = trans-block_rsv;
+
+   if (root == root-fs_info-csum_root  trans-adding_csums)
block_rsv = trans-block_rsv;
 
if (!block_rsv)
@@ -4314,6 +4317,9 @@ static void release_global_block_rsv(struct btrfs_fs_info 
*fs_info)
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
  struct btrfs_root *root)
 {
+   if (!trans-block_rsv)
+   return;
+
if (!trans-bytes_reserved)
return;
 
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 5d158d3..863c34d 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -690,6 +690,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
return -ENOMEM;
 
sector_sum = sums-sums;
+   trans-adding_csums = 1;
 again:
next_offset = (u64)-1;
found_next = 0;
@@ -853,6 +854,7 @@ next_sector:
goto again;
}
 out:
+   trans-adding_csums = 0;
btrfs_free_path(path);
return ret;
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e5f1b15..021c6e2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -351,6 +351,7 @@ again:
h-bytes_reserved = 0;
h-delayed_ref_updates = 0;
h-use_count = 1;
+   h-adding_csums = 0;
h-block_rsv = NULL;
h-orig_rsv = NULL;
h-aborted = 0;
@@ -473,7 +474,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle 
*trans,
 struct btrfs_root *root)
 {
struct btrfs_transaction *cur_trans = trans-transaction;
-   struct btrfs_block_rsv *rsv = trans-block_rsv;
int updates;
int err;
 
@@ -481,12 +481,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle 
*trans,
if (cur_trans-blocked || cur_trans-delayed_refs.flushing)
return 1;
 
-   /*
-* We need to do this in case we're deleting csums so the global block
-* rsv get's used instead of the csum block rsv.
-*/
-   trans-block_rsv = NULL;
-
updates = trans-delayed_ref_updates;
trans-delayed_ref_updates = 0;
if (updates) {
@@ -495,8 +489,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle 
*trans,
return err;
}
 
-   trans-block_rsv = rsv;
-
return should_end_transaction(trans, root);
 }
 
@@ -513,8 +505,6 @@ static int __btrfs_end_transaction(struct 
btrfs_trans_handle *trans,
return 0;
}
 
-   btrfs_trans_release_metadata(trans, root);
-   trans-block_rsv = NULL;
while (count  2) {
unsigned long cur = trans-delayed_ref_updates;
trans-delayed_ref_updates = 0;
@@ -527,6 +517,8 @@ static int __btrfs_end_transaction(struct 
btrfs_trans_handle *trans,
}
count++;
}
+   btrfs_trans_release_metadata(trans, root);
+   trans-block_rsv = NULL;
 
if (lock  !atomic_read(root-fs_info-open_ioctl_trans) 
should_end_transaction(trans, root)) {
@@ -1269,9 +1261,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle 

Re: btrfs deadlock in 3.5-rc3

2012-06-26 Thread Josef Bacik
On Tue, Jun 26, 2012 at 10:47:56AM -0600, Stefan Priebe wrote:
 Am 25.06.2012 22:23, schrieb Josef Bacik:
  On Mon, Jun 25, 2012 at 02:20:31PM -0600, Stefan Priebe wrote:
  Am 25.06.2012 22:11, schrieb Josef Bacik:
  On Mon, Jun 25, 2012 at 01:33:09PM -0600, Stefan Priebe wrote:
  With v3.4 the same. Can't go back more as this really results in very
  fast corruption. Any ideas how to debug?
 
 
  What workload are you running?  I have a ssd here with discard support I 
  can try
  and reproduce on.  Thanks,
 
  i'm using fio with 50 jobs and randwrite of 4k blocks in ceph but i
  don't know which load ceph then exactly generates. ;-(
 
 
  Thats fine, I have this handy create a local ceph cluster script from an
  earlier problem, just send me your fio job and I'll run it locally.  Thanks,
 
 Where you able to find anything? Can i do more or different testing?
 

I can't reproduce so I'm going to have to figure out a way to debug it through
you, as soon as I think of something I will let you know.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs deadlock in 3.5-rc3

2012-06-26 Thread Stefan Priebe

Am 26.06.2012 22:14, schrieb Josef Bacik:

I can't reproduce so I'm going to have to figure out a way to debug it through
you, as soon as I think of something I will let you know.  Thanks,



Thanks. You mentioned that discard shouldn't have any positive effects 
on a SSD.


May i see a sideffect? I mean with discard 13.000 IOPs in ceph without 
discard just 6000-9000 IOPs could this be real or might this just happen 
due to the bug i see?


Stefan
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs oops with kernel 3.5-rc4

2012-06-26 Thread Josef Bacik
On Tue, Jun 26, 2012 at 02:09:19PM -0600, Nathan A. Mourey II wrote:
 Oops with kernel v3.5-rc4.  This error produced while:
 
 emerge --sync   emerge --update --newuse --deep --with-bdeps=y @world  
 emerge --depclean  revdep-rebuild
 and 
 stress --cpu 1 --io 8 -d 8  --vm 4  -t 8h
 
 ver_linux.sh was ran on stable v3.4.4 system.  
 
 Linux peach.example.com 3.4.4 #2 SMP Sun Jun 24 20:38:42 EDT 2012 i686
 Intel(R) Pentium(R) 4 CPU 2.80GHz GenuineIntel GNU/Linux
 
  Gnu C  4.5.3
  Gnu make   3.82
  binutils   2.21.1
  util-linux ./ver_linux: line 23: fdformat: command not found
  mount  support
  module-init-tools  found
  quota-tools3.17.
  Linux C Library2.14.1
  Dynamic linker (ldd)   2.14.1
  Procps 3.2.8
  Kbd1.15.3wip
  Sh-utils   8.14
  Modules Loaded ext3 jbd ehci_hcd
 

Can you gdb btrfs.ko and do 

list *(btrfs_finish_ordered_io+0x22)

please?  And are you running compression by chance?  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs deadlock in 3.5-rc3

2012-06-26 Thread Josef Bacik
On Tue, Jun 26, 2012 at 02:19:17PM -0600, Stefan Priebe wrote:
 Am 26.06.2012 22:14, schrieb Josef Bacik:
  I can't reproduce so I'm going to have to figure out a way to debug it 
  through
  you, as soon as I think of something I will let you know.  Thanks,
 
 
 Thanks. You mentioned that discard shouldn't have any positive effects 
 on a SSD.
 
 May i see a sideffect? I mean with discard 13.000 IOPs in ceph without 
 discard just 6000-9000 IOPs could this be real or might this just happen 
 due to the bug i see?
 

Beats me, it would seem to me that discard would make things slower since we
have to wait for the commit to discard everybody, but who knows, stranger things
have happened.  Can you reproduce 2 more times and get sysrq+w each time so I
have a few different outputs to compare, maybe I'm missing something.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: System Policy for Filenames

2012-06-26 Thread Fajar A. Nugraha
On Wed, Jun 27, 2012 at 1:28 AM, Aaron Peterson
myusualnickn...@gmail.com wrote:
 Billy,

 Thank you! I will look into FUSE.

 Ultimately, I want my / to be mounted with these rules,  I will need a
 boot loader to be able to handle it.

Try looking at how ubuntu live cd works. Last time I check, it can use
unionfs-fuse as / to make the read-only cd media appear writable
live session. Something similar should be applicable to your needs.

  I am wondering if filesystem software has hooks for AppArmor or
 SELinux, or some other Linux Security Module would be appropriated to
 add to filesystem code?

Not that I know of.

-- 
Fajar
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Long btrfs hangs during suspend to RAM / BTRFS warning (device dm-0): Aborting unused transaction

2012-06-26 Thread Marc MERLIN
On Tue, Jun 26, 2012 at 12:36:37PM -0700, Marc MERLIN wrote:
 I was fine with btrfs until 3.2.16, but when going to 3.4 or 3.4.4, I'm
 having my system randomly not wanting to suspend to RAM.
 
 My suspend light started flashing but the system wasn't suspending.
 I was able to unlock X, run top, saw that btrfs-delalloc-??? was taking
 98% CPU, and any command that touched disk hung 
 (so I could log in, but would not get a shell).
 
 I did the sysrq commands, which took a long time to work (they stacked
 up with nothing happening and maybe 10mn later, the system unhung itself
 and they ran, maybe too late, not sure).
 
 Interestingly the system did go to sleep and resumed ok after that, it
 just took 20mn to get there.
 
 It's all pasted below.
 
 gandalfthegreat:~# btrfs fi show
 Label: 'btrfs_pool1'  uuid: 873d526c-e911-4234-af1b-239889cd143d
   Total devices 1 FS bytes used 214.44GB
   devid1 size 231.02GB used 231.02GB path /dev/dm-0
 
 Btrfs Btrfs v0.19
 gandalfthegreat:~# 

Now, I'm also seeing these below and I have this again (86% CPU):
 6076 root  20   0 000 R   86  0.0  29:40.11 btrfs-delalloc-

How bad is it, doctor?  I think I'll be going back to 3.2.16 for now though.

[100415.369301] BTRFS warning (device dm-0): Aborting unused transaction.
[100415.739337] BTRFS warning (device dm-0): Aborting unused transaction.
[100416.093839] BTRFS warning (device dm-0): Aborting unused transaction.
[100416.433245] BTRFS warning (device dm-0): Aborting unused transaction.
[100709.352445] usb 1-5: USB disconnect, device number 8
[100710.197602] thinkpad_acpi: EC reports that Thermal Table has changed
[100710.199954] ACPI: \_SB_.GDCK - undocking
[100711.358679] wlan0: deauthenticating from 00:24:6c:67:03:d1 by local choice 
(reason=3)
[100713.044214] e1000e :00:19.0: irq 46 for MSI/MSI-X
[101447.801190] btrfs no csum found for inode 3910588 start 5144576
[101447.802277] btrfs csum failed ino 3910588 off 5144576 csum 2096800889 
private 0
[101447.957161] btrfs no csum found for inode 3910588 start 5996544
[101447.957990] btrfs csum failed ino 3910588 off 5996544 csum 2096800889 
private 0
[101447.960459] btrfs no csum found for inode 3910588 start 6062080
[101447.961351] btrfs csum failed ino 3910588 off 6062080 csum 2096800889 
private 0
[101447.962459] btrfs no csum found for inode 3910588 start 6225920
[101447.963543] btrfs csum failed ino 3910588 off 6225920 csum 2096800889 
private 0
[101447.964369] btrfs no csum found for inode 3910588 start 6291456
[101447.965045] btrfs csum failed ino 3910588 off 6291456 csum 2096800889 
private 0
[101447.966406] btrfs no csum found for inode 3910588 start 6356992
[101447.967392] btrfs csum failed ino 3910588 off 6356992 csum 2123070294 
private 0
[101448.088912] btrfs no csum found for inode 3910588 start 6553600
[101448.088971] btrfs no csum found for inode 3910588 start 6586368
[101448.090058] btrfs csum failed ino 3910588 off 6586368 csum 2096800889 
private 0
[101448.090305] btrfs csum failed ino 3910588 off 6553600 csum 4253301504 
private 0
[101448.093231] btrfs no csum found for inode 3910588 start 6717440
[101448.094269] btrfs csum failed ino 3910588 off 6717440 csum 2096800889 
private 0
[101448.096104] btrfs no csum found for inode 3910588 start 6848512
[101448.097002] btrfs csum failed ino 3910588 off 6848512 csum 3282939717 
private 0
[101448.098249] btrfs no csum found for inode 3910588 start 6946816
[101448.101561] btrfs no csum found for inode 3910588 start 7045120
[101448.106352] btrfs no csum found for inode 3910588 start 7176192
[101448.108224] btrfs no csum found for inode 3910588 start 7241728
[101448.33] btrfs no csum found for inode 3910588 start 7307264
[101448.127081] btrfs no csum found for inode 3910588 start 7569408
[101448.144351] btrfs no csum found for inode 3910588 start 7864320
[101448.155857] btrfs no csum found for inode 3910588 start 8060928
[101448.165868] btrfs no csum found for inode 3910588 start 8257536
[101448.175275] btrfs no csum found for inode 3910588 start 8454144
[101448.176087] btrfs no csum found for inode 3910588 start 8552448
[101448.180927] btrfs no csum found for inode 3910588 start 8847360
[101448.183489] btrfs no csum found for inode 3910588 start 8978432
[101448.192909] btrfs no csum found for inode 3910588 start 9371648
[101448.198959] btrfs no csum found for inode 3910588 start 9568256
[101448.413716] btrfs no csum found for inode 3910588 start 5439488
[101448.413996] btrfs no csum found for inode 3910588 start 5537792
[101448.414379] btrfs no csum found for inode 3910588 start 5603328
[101448.416386] btrfs no csum found for inode 3910588 start 5734400
[101448.424108] btrfs no csum found for inode 3910588 start 5799936
[101448.721649] btrfs no csum found for inode 3910588 start 9699328
[101448.737377] btrfs no csum found for inode 3910588 start 9863168
[101448.743929] btrfs no csum found for inode 3910588 start 10027008
[101448.753186] btrfs no csum found for inode 

waiting for deferred cleanup operations

2012-06-26 Thread David Nicol
I've noticed that the patches I posted here two years ago about an
ioctl to allow userspace to wait for deferred ops to complete aren't
included in the has all patches posted to mailing list git repo. Is
this an oversight or is there a problem with the proposed architecture
or implementation?

The proposed architecture is, there is an IOCTL to wait for deferred
activity to complete, and it takes a flag field listing possible
deferred activities that are being waited for. Only one was defined in
the submitted patch, to wait for completion of removal of dead trees,
with the hope that future deferred things would register a bit with
the ...

The whole picture is in the introduction to the last submitted version
of the patch,
http://permalink.gmane.org/gmane.comp.file-systems.btrfs/8668

Is the patch as seen there good, and were I to update it to apply
against current source would there
be any problem with tracking it for mainstream inclusion?


David Nicol

-- 
Run it up the flagpole and see who salutes it
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: waiting for deferred cleanup operations

2012-06-26 Thread David Nicol
I see that the #undef spells the macro wrong, so I'll fix that of course too.

On Tue, Jun 26, 2012 at 8:55 PM, David Nicol davidni...@gmail.com wrote:


 The whole picture is in the introduction to the last submitted version
 of the patch,
 http://permalink.gmane.org/gmane.comp.file-systems.btrfs/8668
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/5] introduce btrfs filesystem property command

2012-06-26 Thread Liu Bo
On 06/25/2012 05:20 AM, Alexander Block wrote:

 This patchset introduces the btrfs filesystem property command. It is the
 result of a discussion we had on IRC. I tried to make the properties
 interface as generic and extensible as possible. Comments are welcome.
 
 Currently the command looks like this:
 btrfs fi prop /path/to/object [name[=value]]
 
 Some people may prefer other forms. For example I got suggestions for
 these forms:
 btrfs set/get /path/to/object [name [value]]
 btrfs prop /pach/to/object [name[=value]] (and also without the =)
 
 I'm open to more suggestions and a discussion on this. I'm definitely
 for removing the fi[lesystem] prefix but I'm neutral to the other 
 suggestions made so far.
 
 For now, I've implemented three properties:
 1. read-only. Usable on subvolumes to toggle the read-only flags.
 2. label. I looked through btrfs to find good examples of things that
could be moved to the new properties interface and the filesystem
label looked like a good one. There are for sure more, but that is
something for later (and maybe for someone else). I would suggest 
to move everthing that makes sense over to the props interface and 
mark the old interfaces as deprecated. Comments on this are welcome.
 


Hi Alex,

Thanks for doing these!

I'm doing something similar to yours, but I prefer keeping these prefixes and 
making some
efforts to enhance original APIs:

For subvolume, right now we can have two attributes: readonly and default, and 
let 'btrfs sub list'
work just like 'ls' so that we can get their attributes easier:

o   btrfs subvolume list [-p] path
subvol (Default)
snap (Readonly)

o   btrfs subvolume list [-p] path/subvol
subvol (Default)

o   btrfs subvolume list [-p] path/snap
snap (Readonly)


how about this?

thanks,
liubo

 Alex.
 
 Alexander Block (5):
   Btrfs-progs: add BTRFS_IOC_SUBVOL_GET/SETFLAGS to ioctl.h
   Btrfs-progs: move skip_prefix and prefixcmp to utils.c
   Btrfs-progs: let get_label return the label instead of of printing it
   Btrfs-progs: make filesystem_cmd_group non const
   Btrfs-progs: introduce btrfs filesystem property command
 
  Makefile  |3 +-
  btrfs.c   |   19 +--
  btrfslabel.c  |   13 +-
  btrfslabel.h  |4 +-
  cmds-filesystem.c |  115 +-
  commands.h|9 +-
  help.c|2 +
  ioctl.h   |2 +
  props.c   |  460 
 +
  props.h   |   45 ++
  utils.c   |   15 ++
  utils.h   |3 +
  12 files changed, 659 insertions(+), 31 deletions(-)
  create mode 100644 props.c
  create mode 100644 props.h
 


--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


WARNING: at fs/btrfs/free-space-cache.c:1887 after hard shutdown.

2012-06-26 Thread Jordan Windsor
Hello,
My computer locked up and I had to press the reset button.
Ever since then I can't mount the btrfs filesystem, here's the output:

[   37.645583] [ cut here ]
[   37.645598] WARNING: at fs/btrfs/free-space-cache.c:1887
btrfs_remove_free_space+0x329/0x350 [btrfs]()
[   37.645600] Modules linked in: btrfs zlib_deflate libcrc32c ext4
jbd2 mbcache crc16 snd_hda_codec_realtek rc_dib0700_rc5 mt2266
dvb_usb_dib0700 dib3000mc dib8000 dib0070 dib7000m dib7000p
dibx000_common dib0090 dvb_usb dvb_core rc_core microcode joydev
usb_storage uas coretemp acpi_cpufreq mperf ppdev nvidia(PO) 8139too
8139cp mii iTCO_wdt i2c_i801 i2c_core iTCO_vendor_support serio_raw
i7core_edac edac_core evdev pcspkr processor parport_pc parport
snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_page_alloc snd_timer
snd soundcore button reiserfs aes_x86_64 cryptd aes_generic xts
gf128mul dm_crypt dm_mod usbhid hid sr_mod sd_mod cdrom pata_jmicron
pata_acpi uhci_hcd ata_piix ata_generic ahci libahci libata ehci_hcd
crc32c_intel usbcore scsi_mod usb_common floppy
[   37.645651] Pid: 304, comm: mount Tainted: P   O 3.4.4-2-ck #1
[   37.645653] Call Trace:
[   37.645659]  [810515cf] warn_slowpath_common+0x7f/0xc0
[   37.645662]  [8105162a] warn_slowpath_null+0x1a/0x20
[   37.645670]  [a0f4a159] btrfs_remove_free_space+0x329/0x350 [btrfs]
[   37.645674]  [81073bb0] ? abort_exclusive_wait+0xb0/0xb0
[   37.645682]  [a0efaa6f]
btrfs_alloc_logged_file_extent+0x1bf/0x1e0 [btrfs]
[   37.645688]  [a0ee7bfa] ? btrfs_free_path+0x2a/0x40 [btrfs]
[   37.645697]  [a0f44c5e] replay_one_extent+0x5be/0x620 [btrfs]
[   37.645701]  [81179f55] ? iput+0x105/0x210
[   37.645710]  [a0f4549b] replay_one_buffer+0x27b/0x350 [btrfs]
[   37.645719]  [a0f302bd] ? alloc_extent_buffer+0x9d/0x5c0 [btrfs]
[   37.645728]  [a0f42d92] walk_down_log_tree+0x202/0x3c0 [btrfs]
[   37.645736]  [a0f42fed] walk_log_tree+0x9d/0x1f0 [btrfs]
[   37.645745]  [a0f46f4b] btrfs_recover_log_trees+0x21b/0x3a0 [btrfs]
[   37.645754]  [a0f45220] ? replay_one_dir_item+0xe0/0xe0 [btrfs]
[   37.645763]  [a0f0883d] open_ctree+0x14ed/0x1ac0 [btrfs]
[   37.645767]  [8121e101] ? disk_name+0x61/0xc0
[   37.645773]  [a0ee5836] btrfs_mount+0x5b6/0x6a0 [btrfs]
[   37.645776]  [8113f400] ? alloc_pages_current+0xb0/0x120
[   37.645780]  [81163533] mount_fs+0x43/0x1b0
[   37.645783]  [8117d740] vfs_kern_mount+0x70/0x100
[   37.645786]  [8117dc64] do_kern_mount+0x54/0x110
[   37.645788]  [8117f55a] do_mount+0x26a/0x850
[   37.645791]  [81101e1e] ? __get_free_pages+0xe/0x50
[   37.645794]  [8117f15a] ? copy_mount_options+0x3a/0x180
[   37.645797]  [8117fc7d] sys_mount+0x8d/0xe0
[   37.645801]  [814626e9] system_call_fastpath+0x16/0x1b
[   37.645802] ---[ end trace 98bf42a1cdef3f88 ]---
[   37.645812] [ cut here ]
[   37.646716] kernel BUG at fs/btrfs/extent-tree.c:6107!
[   37.647722] invalid opcode:  [#1] PREEMPT SMP
[   37.648763] CPU 1
[   37.649156] Modules linked in: btrfs zlib_deflate libcrc32c ext4
jbd2 mbcache crc16 snd_hda_codec_realtek rc_dib0700_rc5 mt2266
dvb_usb_dib0700 dib3000mc dib8000 dib0070 dib7000m dib7000p
dibx000_common dib0090 dvb_usb dvb_core rc_core microcode joydev
usb_storage uas coretemp acpi_cpufreq mperf ppdev nvidia(PO) 8139too
8139cp mii iTCO_wdt i2c_i801 i2c_core iTCO_vendor_support serio_raw
i7core_edac edac_core evdev pcspkr processor parport_pc parport
snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_page_alloc snd_timer
snd soundcore button reiserfs aes_x86_64 cryptd aes_generic xts
gf128mul dm_crypt dm_mod usbhid hid sr_mod sd_mod cdrom pata_jmicron
pata_acpi uhci_hcd ata_piix ata_generic ahci libahci libata ehci_hcd
crc32c_intel usbcore scsi_mod usb_common floppy
[   37.665300]
[   37.665591] Pid: 304, comm: mount Tainted: PW  O 3.4.4-2-ck #1
[   37.666973] RIP: 0010:[a0efaa77]  [a0efaa77]
btrfs_alloc_logged_file_extent+0x1c7/0x1e0 [btrfs]
[   37.669058] RSP: 0018:880118649748  EFLAGS: 00010282
[   37.670098] RAX: ffea RBX: 88010ff1a000 RCX: 000e
[   37.671495] RDX:  RSI: 0046 RDI: 88010fef6840
[   37.672892] RBP: 8801186497c8 R08:  R09: 
[   37.674289] R10:  R11:  R12: 880118649883
[   37.675687] R13:  R14: 0019cf4be000 R15: 88010fc95800
[   37.731396] FS:  7fb10edfb740() GS:88011fc4()
knlGS:
[   37.788242] CS:  0010 DS:  ES:  CR0: 8005003b
[   37.788243] CR2: 7f5b18c9c15c CR3: 000110a7d000 CR4: 07e0
[   37.788245] DR0:  DR1:  DR2: 
[   37.788247] DR3:  DR6: 0ff0 DR7: 

Re: btrfs volume suddenly becomes read-only

2012-06-26 Thread Chester
Problem still occurs.. With just a bittorrent client (downloading) +
chrome running.

This could also be related but, last night, I triggered a btrfs
balance.. I'm not too sure whether the balance finished or not,
because when I took a look at it this morning, it said no file or
directory and told me to check dmesg for messages. I did check, and I
saw the usual bunch of moving block group and  extents found
messages that come from balances.
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Long btrfs hangs during suspend to RAM / BTRFS warning (device dm-0): Aborting unused transaction

2012-06-26 Thread Marc MERLIN
On Tue, Jun 26, 2012 at 06:38:18PM -0700, Marc MERLIN wrote:
 Now, I'm also seeing these below and I have this again (86% CPU):
  6076 root  20   0 000 R   86  0.0  29:40.11 btrfs-delalloc-  
   
 
 How bad is it, doctor?  I think I'll be going back to 3.2.16 for now though.

Back to 3.2.16, I'm now seeing this:
[  840.516733] INFO: task VirtualBox:6818 blocked for more than 120 seconds.
[  840.516735] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[  840.516736] VirtualBox  D 8801fd134080 0  6818   6758 0x0080
[  840.516740]  8801fd134080 0086 0050 
880202e7f100
[  840.516744]  00013580 8801c6f0dfd8 8801c6f0dfd8 
8801fd134080
[  840.516748]  8801c6f0da68 8801c6f0da68 88020a4e22f0 
88023bc13e08
[  840.516752] Call Trace:
[  840.516755]  [810b5c67] ? __lock_page+0x66/0x66
[  840.516758]  [8134aea4] ? io_schedule+0x58/0x6f
[  840.516761]  [810b5c6d] ? sleep_on_page+0x6/0xa
[  840.516764]  [8134b1e5] ? __wait_on_bit_lock+0x3c/0x85
[  840.516767]  [810b5c62] ? __lock_page+0x61/0x66
[  840.516770]  [81060051] ? autoremove_wake_function+0x2a/0x2a
[  840.516785]  [a01838d7] ? 
extent_write_cache_pages.isra.13.constprop.22+0xf6/0x278 [btrfs]
[  840.516789]  [810ec9cb] ? __cache_free.isra.40+0x19/0x1a7
[  840.516792]  [8134ed52] ? sub_preempt_count+0x83/0x94
[  840.516795]  [8134c2dd] ? _raw_spin_unlock+0x24/0x30
[  840.516811]  [a0183c4b] ? extent_writepages+0x40/0x57 [btrfs]
[  840.516826]  [a0177f5f] ? __btrfs_buffered_write+0x2bb/0x2dc 
[btrfs]
[  840.516841]  [a016e88a] ? uncompress_inline.isra.44+0x116/0x116 
[btrfs]
[  840.516844]  [810b6aaf] ? __filemap_fdatawrite_range+0x4b/0x50
[  840.516847]  [810b6ad9] ? filemap_write_and_wait_range+0x25/0x4d
[  840.516863]  [a01782ce] ? btrfs_file_aio_write+0x34e/0x490 [btrfs]
[  840.516866]  [8103e092] ? get_parent_ip+0x9/0x1b
[  840.516882]  [a0177f80] ? __btrfs_buffered_write+0x2dc/0x2dc 
[btrfs]
[  840.516886]  [8112f19c] ? aio_rw_vect_retry+0x70/0x18e
[  840.516888]  [8112f12c] ? aio_fsync+0x22/0x22
[  840.516891]  [8112fbc7] ? aio_run_iocb+0x72/0x11c
[  840.516894]  [81130d9a] ? do_io_submit+0x6a4/0x7f9
[  840.516898]  [813508d2] ? system_call_fastpath+0x16/0x1b
[ 1187.553635] btrfs: unlinked 8 orphans
[ 3810.200064] e1000e :00:19.0: BAR 0: set to [mem 0xfc00-0xfc01] 
(PCI address [0xfc00-0xfc01])
[ 3810.200071] e1000e :00:19.0: BAR 1: set to [mem 0xfc025000-0xfc025fff] 
(PCI address [0xfc025000-0xfc025fff])
[ 3810.200076] e1000e :00:19.0: BAR 2: set to [io  0x1840-0x185f] (PCI 
address [0x1840-0x185f])
[ 3810.200093] e1000e :00:19.0: restoring config space at offset 0xf (was 
0x100, writing 0x10b)
[ 3810.200115] e1000e :00:19.0: restoring config space at offset 0x1 (was 
0x10, writing 0x100107)
[ 3810.200147] e1000e :00:19.0: PME# disabled
[ 3810.200224] e1000e :00:19.0: irq 45 for MSI/MSI-X
[ 4671.144685] iwlwifi :03:00.0: Tx aggregation enabled on ra = 
2c:b0:5d:3c:7d:f1 tid = 1
[ 4799.384107] btrfs: unlinked 8 orphans
[ 8436.512513] btrfs: unlinked 7 orphans
[11350.749850] btrfs no csum found for inode 3909426 start 0
[11350.750697] btrfs csum failed ino 3909426 off 0 csum 1419704114 private 0
[11652.088805] btrfs no csum found for inode 3910848 start 0
[11652.089524] btrfs csum failed ino 3910848 off 0 csum 3145117582 private 0

My firefox and chrome profiles were corrupted, so I had to restore them from an 
old snapshot.

I can't prove it, but it looks like my corruption happened right at the same
time than I rebooted to 3.4.4.

Marc
-- 
A mouse is a device used to point at the xterm you want to type in - A.S.R.
Microsoft is to operating systems 
   what McDonalds is to gourmet cooking
Home page: http://marc.merlins.org/  
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs deadlock in 3.5-rc3

2012-06-26 Thread Stefan Priebe - Profihost AG
Yes i will do so. Right now i was trying to compare discard with non 
discard with this simple command:
for i in `seq 0 1 1000`; do dd if=/dev/zero of=t_$i bs=4M count=1; rm 
t_$i; done;


But i hit a new bug:

[39577.660228] BUG: unable to handle kernel paging request at 
fe50

[39577.686517] IP: [8131b4f3] btrfs_finish_ordered_io+0x23/0x3f0
[39577.713417] PGD 1c0d067 PUD 1c0e067 PMD 0
[39577.740039] Oops:  [#1] SMP
[39577.766401] CPU 6
[39577.792540] Modules linked in: nf_conntrack_ipv4 nf_conntrack 
nf_defrag_ipv4 ipv6 i2c_i801 coretemp i2c_core ixgbe(O) [last unloaded: 
scsi_wait_scan]

[39577.847511]
[39577.847513] Pid: 3447, comm: btrfs-endio-wri Tainted: G   O 
3.5.0-rc4intel #15 Supermicro 
X9SRE/X9SRE-3F/X9SRi/X9SRi-3F/X9SRE/X9SRE-3F/X9SRi/X9SRi-3F
[39577.847516] RIP: 0010:[8131b4f3]  [8131b4f3] 
btrfs_finish_ordered_io+0x23/0x3f0

[39577.847516] RSP: 0018:880e3b861d90  EFLAGS: 00010287
[39577.847517] RAX: 880e3b861e90 RBX: 880e3a8fb100 RCX: 
880e3b861e90
[39577.847517] RDX: 880e3b861e90 RSI: 880e3a8fb190 RDI: 
880e3a8fb100
[39577.847518] RBP: 880e3b861e10 R08: dead00100100 R09: 
dead00200200
[39577.847518] R10:  R11: 0001 R12: 
880e3a624770
[39577.847518] R13:  R14: 880e3a8fb1b8 R15: 
880e3b861e80
[39577.847519] FS:  () GS:880e7fd8() 
knlGS:

[39577.847520] CS:  0010 DS:  ES:  CR0: 8005003b
[39577.847520] CR2: fe50 CR3: 01c0b000 CR4: 
000407e0
[39577.847521] DR0:  DR1:  DR2: 

[39577.847521] DR3:  DR6: 0ff0 DR7: 
0400
[39577.847522] Process btrfs-endio-wri (pid: 3447, threadinfo 
880e3b86, task 880e40e58000)

[39577.847522] Stack:
[39577.847524]   dead00200200 000100965b86 
880e40e94000
[39577.847525]  8104dc20 880e40e58000  

[39577.847526]    880e40e58000 
880e3a624720

[39577.847527] Call Trace:
[39577.847530]  [8104dc20] ? lock_timer_base+0x70/0x70
[39577.847531]  [8131b8d0] finish_ordered_fn+0x10/0x20
[39577.847533]  [8133f38e] worker_loop+0x14e/0x530
[39577.847534]  [8133f240] ? btrfs_queue_worker+0x310/0x310
[39577.847535]  [8133f240] ? btrfs_queue_worker+0x310/0x310
[39577.847538]  [8105ffd6] kthread+0x96/0xa0
[39577.847541]  [816dc594] kernel_thread_helper+0x4/0x10
[39577.847543]  [8105ff40] ? kthread_worker_fn+0x130/0x130
[39577.847544]  [816dc590] ? gs_change+0xb/0xb
[39577.847555] Code: 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 c4 80 48 
89 5d d8 4c 89 65 e0 4c 89 6d e8 4c 89 75 f0 4c 89 7d f8 48 89 fb 4c 8b 
6f 38 4d 8b a5 50 fe ff ff 4d 8d 95 50 fe ff ff 48 c7 45 c8 00 00 00

[39577.847556] RIP  [8131b4f3] btrfs_finish_ordered_io+0x23/0x3f0
[39577.847557]  RSP 880e3b861d90
[39577.847557] CR2: fe50
[39577.847558] ---[ end trace 27bdc0b318ad6463 ]---

Am 26.06.2012 22:48, schrieb Josef Bacik:

On Tue, Jun 26, 2012 at 02:19:17PM -0600, Stefan Priebe wrote:

Am 26.06.2012 22:14, schrieb Josef Bacik:

I can't reproduce so I'm going to have to figure out a way to debug it through
you, as soon as I think of something I will let you know.  Thanks,



Thanks. You mentioned that discard shouldn't have any positive effects
on a SSD.

May i see a sideffect? I mean with discard 13.000 IOPs in ceph without
discard just 6000-9000 IOPs could this be real or might this just happen
due to the bug i see?



Beats me, it would seem to me that discard would make things slower since we
have to wait for the commit to discard everybody, but who knows, stranger things
have happened.  Can you reproduce 2 more times and get sysrq+w each time so I
have a few different outputs to compare, maybe I'm missing something.  Thanks,

Josef



--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html