Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!

2015-10-11 Thread Peter Becker
dmesg:

[ 3000.613310] INFO: task btrfs-transacti:11423 blocked for more than
120 seconds.
[ 3000.613314]   Tainted: G U  4.2.3-040203-generic
#201510030832
[ 3000.613314] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 3000.613315] btrfs-transacti D  0 11423  2 0x
[ 3000.613318]  88007bd2bb58 0046 880215d7e600
8802121d2640
[ 3000.613319]  0246 88007bd2c000 8801aa607928
8801aa607940
[ 3000.613321]  88007bd2bba8 8801aa607920 88007bd2bb78
817a6357
[ 3000.613322] Call Trace:
[ 3000.613327]  [] schedule+0x37/0x80
[ 3000.613341]  [] btrfs_tree_lock+0x98/0x1c0 [btrfs]
[ 3000.613344]  [] ? prepare_to_wait_event+0xf0/0xf0
[ 3000.613349]  [] btrfs_search_slot+0x6e5/0x9c0 [btrfs]
[ 3000.613351]  [] ? __set_page_dirty_nobuffers+0xe7/0x140
[ 3000.613356]  [] btrfs_lookup_inode+0x2f/0xa0 [btrfs]
[ 3000.613364]  [] ?
btrfs_delete_delayed_items+0x313/0x330 [btrfs]
[ 3000.613370]  [] ?
btrfs_insert_delayed_items+0xa4/0x420 [btrfs]
[ 3000.613376]  []
__btrfs_update_delayed_inode+0x5a/0x1e0 [btrfs]
[ 3000.613382]  []
__btrfs_run_delayed_items+0x17c/0x210 [btrfs]
[ 3000.613388]  [] btrfs_run_delayed_items+0x13/0x20 [btrfs]
[ 3000.613395]  []
btrfs_commit_transaction+0x2c8/0xb10 [btrfs]
[ 3000.613400]  [] ? start_transaction+0x93/0x580 [btrfs]
[ 3000.613406]  [] transaction_kthread+0x1ba/0x240 [btrfs]
[ 3000.613411]  [] ?
btrfs_cleanup_transaction+0x540/0x540 [btrfs]
[ 3000.613412]  [] kthread+0xc9/0xe0
[ 3000.613414]  [] ? kthread_create_on_node+0x180/0x180
[ 3000.613415]  [] ret_from_fork+0x3f/0x70
[ 3000.613416]  [] ? kthread_create_on_node+0x180/0x180
[ 3120.613665] INFO: task btrfs-transacti:11423 blocked for more than
120 seconds.
[ 3120.613669]   Tainted: G U  4.2.3-040203-generic
#201510030832
[ 3120.613669] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 3120.613670] btrfs-transacti D  0 11423  2 0x
[ 3120.613672]  88007bd2bb58 0046 880215d7e600
8802121d2640
[ 3120.613674]  0246 88007bd2c000 8801aa607928
8801aa607940
[ 3120.613675]  88007bd2bba8 8801aa607920 88007bd2bb78
817a6357
[ 3120.613676] Call Trace:
[ 3120.613682]  [] schedule+0x37/0x80
[ 3120.613695]  [] btrfs_tree_lock+0x98/0x1c0 [btrfs]
[ 3120.613698]  [] ? prepare_to_wait_event+0xf0/0xf0
[ 3120.613702]  [] btrfs_search_slot+0x6e5/0x9c0 [btrfs]
[ 3120.613704]  [] ? __set_page_dirty_nobuffers+0xe7/0x140
[ 3120.613709]  [] btrfs_lookup_inode+0x2f/0xa0 [btrfs]
[ 3120.613716]  [] ?
btrfs_delete_delayed_items+0x313/0x330 [btrfs]
[ 3120.613721]  [] ?
btrfs_insert_delayed_items+0xa4/0x420 [btrfs]
[ 3120.613727]  []
__btrfs_update_delayed_inode+0x5a/0x1e0 [btrfs]
[ 3120.613732]  []
__btrfs_run_delayed_items+0x17c/0x210 [btrfs]
[ 3120.613744]  [] btrfs_run_delayed_items+0x13/0x20 [btrfs]
[ 3120.613749]  []
btrfs_commit_transaction+0x2c8/0xb10 [btrfs]
[ 3120.613754]  [] ? start_transaction+0x93/0x580 [btrfs]
[ 3120.613759]  [] transaction_kthread+0x1ba/0x240 [btrfs]
[ 3120.613764]  [] ?
btrfs_cleanup_transaction+0x540/0x540 [btrfs]
[ 3120.613766]  [] kthread+0xc9/0xe0
[ 3120.613767]  [] ? kthread_create_on_node+0x180/0x180
[ 3120.613768]  [] ret_from_fork+0x3f/0x70
[ 3120.613769]  [] ? kthread_create_on_node+0x180/0x180
[ 3240.614145] INFO: task btrfs-transacti:11423 blocked for more than
120 seconds.
[ 3240.614148]   Tainted: G U  4.2.3-040203-generic
#201510030832
[ 3240.614149] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 3240.614150] btrfs-transacti D  0 11423  2 0x
[ 3240.614152]  88007bd2bb58 0046 880215d7e600
8802121d2640
[ 3240.614154]  0246 88007bd2c000 8801aa607928
8801aa607940
[ 3240.614155]  88007bd2bba8 8801aa607920 88007bd2bb78
817a6357
[ 3240.614156] Call Trace:
[ 3240.614161]  [] schedule+0x37/0x80
[ 3240.614174]  [] btrfs_tree_lock+0x98/0x1c0 [btrfs]
[ 3240.614177]  [] ? prepare_to_wait_event+0xf0/0xf0
[ 3240.614181]  [] btrfs_search_slot+0x6e5/0x9c0 [btrfs]
[ 3240.614183]  [] ? __set_page_dirty_nobuffers+0xe7/0x140
[ 3240.614188]  [] btrfs_lookup_inode+0x2f/0xa0 [btrfs]
[ 3240.614194]  [] ?
btrfs_delete_delayed_items+0x313/0x330 [btrfs]
[ 3240.614200]  [] ?
btrfs_insert_delayed_items+0xa4/0x420 [btrfs]
[ 3240.614206]  []
__btrfs_update_delayed_inode+0x5a/0x1e0 [btrfs]
[ 3240.614211]  []
__btrfs_run_delayed_items+0x17c/0x210 [btrfs]
[ 3240.614217]  [] btrfs_run_delayed_items+0x13/0x20 [btrfs]
[ 3240.614222]  []
btrfs_commit_transaction+0x2c8/0xb10 [btrfs]
[ 3240.614227]  [] ? start_transaction+0x93/0x580 [btrfs]
[ 3240.614232]  [] transaction_kthread+0x1ba/0x240 [btrfs]
[ 3240.614236]  [] ?
btrfs_cleanup_transaction+0x540/0x540 [btrfs]
[ 3240.614238]  [] kthread+0xc9/0xe0
[ 3240.614239]  [] ? 

Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!

2015-10-11 Thread Peter Becker
I also get some of this syslog entrys:

[ 3840.616538] INFO: task btrfs-transacti:11423 blocked for more than
120 seconds.
[ 3840.616541]   Tainted: G U  4.2.3-040203-generic
#201510030832
[ 3840.616542] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[ 3840.616543] btrfs-transacti D  0 11423  2 0x
[ 3840.616545]  88007bd2bb58 0046 880215d7e600
8802121d2640
[ 3840.616547]  0246 88007bd2c000 8801aa607928
8801aa607940
[ 3840.616548]  88007bd2bba8 8801aa607920 88007bd2bb78
817a6357
[ 3840.616549] Call Trace:
[ 3840.616555]  [] schedule+0x37/0x80
[ 3840.616569]  [] btrfs_tree_lock+0x98/0x1c0 [btrfs]
[ 3840.616572]  [] ? prepare_to_wait_event+0xf0/0xf0
[ 3840.616576]  [] btrfs_search_slot+0x6e5/0x9c0 [btrfs]
[ 3840.616579]  [] ? __set_page_dirty_nobuffers+0xe7/0x140
[ 3840.616584]  [] btrfs_lookup_inode+0x2f/0xa0 [btrfs]
[ 3840.616592]  [] ?
btrfs_delete_delayed_items+0x313/0x330 [btrfs]
[ 3840.616598]  [] ?
btrfs_insert_delayed_items+0xa4/0x420 [btrfs]
[ 3840.616604]  []
__btrfs_update_delayed_inode+0x5a/0x1e0 [btrfs]
[ 3840.616609]  []
__btrfs_run_delayed_items+0x17c/0x210 [btrfs]
[ 3840.616615]  [] btrfs_run_delayed_items+0x13/0x20 [btrfs]
[ 3840.616621]  []
btrfs_commit_transaction+0x2c8/0xb10 [btrfs]
[ 3840.616626]  [] ? start_transaction+0x93/0x580 [btrfs]
[ 3840.616631]  [] transaction_kthread+0x1ba/0x240 [btrfs]
[ 3840.616636]  [] ?
btrfs_cleanup_transaction+0x540/0x540 [btrfs]
[ 3840.616637]  [] kthread+0xc9/0xe0
[ 3840.616638]  [] ? kthread_create_on_node+0x180/0x180
[ 3840.616640]  [] ret_from_fork+0x3f/0x70
[ 3840.616641]  [] ? kthread_create_on_node+0x180/0x180

Is this helpfull? The filesystem is usable but i need the new space.

2015-10-10 21:48 GMT+02:00 Peter Becker :
> btrfs balance start -m /media/RAID
>
> complete with out any error but the resulte of device usage is confusing me.
> Metadata on sdb and sdc are 2 GiB, but on sdd (the new added device)
> is 4 GiB. And the 2. one that's confusing me, is that sdd has a
> "System" entry but sdb and sdc dosn't
>
> floyd@nas ~ $ sudo btrfs dev us /media/RAID/
> /dev/sdb, ID: 1
>Device size: 2.73TiB
>Data,RAID1:  2.11TiB
>Metadata,RAID1:  2.00GiB
>System,RAID1:   32.00MiB
>Unallocated:   628.49GiB
>
> /dev/sdc, ID: 2
>Device size: 2.73TiB
>Data,RAID1:  2.11TiB
>Metadata,RAID1:  2.00GiB
>Unallocated:   628.52GiB
>
> /dev/sdd, ID: 3
>Device size: 2.73TiB
>Data,RAID1:792.00GiB
>Metadata,RAID1:  4.00GiB
>System,RAID1:   32.00MiB
>Unallocated: 1.95TiB
>
> 2015-10-10 21:23 GMT+02:00 Peter Becker :
>> Hi Henk,
>>
>> i have try it with kernel 4.1.6 and 4.2.3; btrfs progs 4.2.1 and 4.2.2
>> .. the same error.
>> System freeze after 70% of balancing.
>>
>> Scrub complete without error.
>>
>> has someone a hint what i can do now?
>>
>> 2015-10-09 15:52 GMT+02:00 Henk Slager :
>>> Hi Peter,
>>>
>>> I would try to add the mount option   skip_balance   for your raid1
>>> pool first, then see if you can use your system as you normally would.
>>> I assume you can live without explicit (re-)balance for some time,
>>> i.e. that the original disks are not too full.
>>>
>>> I recently did also some disks add/remove and also raid profile
>>> convert and found out that kernel 4.2.x did crash my system with
>>> various kernel bugs. So I switched back to 4.1.6 and although other
>>> bugs hit me (see https://bugzilla.kernel.org/show_bug.cgi?id=104371 )
>>> the actions I wanted did complete.
>>>
>>> Using "btrfs check --repair" has never resulted in succes for me (for
>>> some root filesystems (single profiles for s m d) on real and virual
>>> machines), so I would only use that once you have your files backed up
>>> on some other (cloned) filesystem.
>>>
>>> /Henk
>>>
>>> On Fri, Oct 9, 2015 at 9:41 AM, Peter Becker  wrote:

 At first i add a new device to my btrfs raid1 pool and start balance.
 After ~5 hours, balanace hangs and cpu-usage goes to 100% (kworker/u4
 use all cpu-power).

 What should i do now? Run "btrfs check --repair" on all devices?

 Kernel: 4.2.3-040203-generic
 Btrfs progs v4.2.1

 Full Syslog: https://bugzilla.kernel.org/show_bug.cgi?id=105681

 From Syslog:

 [16880.495586] kernel BUG at 
 /home/kernel/COD/linux/fs/btrfs/extent-tree.c:1833!
 [16880.495603] invalid opcode:  [#1] SMP
 [16880.495614] Modules linked in: xt_nat veth xt_conntrack xt_addrtype
 br_netfilter nvram dm_thin_pool dm_persistent_data msr dm_bio_prison
 dm_bufio libcrc32c ir_lirc_codec ir_xmp_decoder lirc_dev
 ir_mce_kbd_decoder ir_sharp_decoder ir_sony_decoder ir_sanyo_decoder

Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!

2015-10-11 Thread Duncan
Peter Becker posted on Sat, 10 Oct 2015 21:48:31 +0200 as excerpted:

> btrfs balance start -m /media/RAID
> 
> complete with out any error but the resulte of device usage is confusing
> me.
> Metadata on sdb and sdc are 2 GiB, but on sdd (the new added device)
> is 4 GiB. And the 2. one that's confusing me, is that sdd has a "System"
> entry but sdb and sdc dosn't
> 
> floyd@nas ~ $ sudo btrfs dev us /media/RAID/
> /dev/sdb, ID: 1
>Device size: 2.73TiB
>Data,RAID1:  2.11TiB
>Metadata,RAID1:  2.00GiB
>System,RAID1:   32.00MiB
>Unallocated:   628.49GiB
> 
> /dev/sdc, ID: 2
>Device size: 2.73TiB
>Data,RAID1:  2.11TiB
>Metadata,RAID1:  2.00GiB
>Unallocated:   628.52GiB
> 
> /dev/sdd, ID: 3
>Device size: 2.73TiB
>Data,RAID1:792.00GiB
>Metadata,RAID1:  4.00GiB
>System,RAID1:   32.00MiB
>Unallocated: 1.95TiB

FWIW, there's also btrfs fi usage, which prints a somewhat different 
layout of pretty much the same statistics.  It may be useful to compare 
output styles and choose the one you prefer.  I prefer fi usage to dev 
usage in most cases, but YMMV.

The key thing to remember about btrfs raid1 on more than two devices is 
that it's exactly two copies, not N copies, where N is the number of 
devices.  In a three-device raid1, by definition, for each chunk that 
will mean one copy each on two devices, with the third device not getting 
a copy of that particular chunk, since btrfs raid1 is exactly two copies, 
no more, no less.

So system is raid1, and sdb and sdd each have a copy of the (apparently 
just one) system chunk, one copy each for two copies total, leaving no 
system chunk to be placed on sdc, which is why it has none.

And, given the stats, there are 4 GiB of raid1 metadata chunks comprising 
two copies of 2 GiB worth of metadata.  Half that metadata has a copy 
each on sdb and sdd, while the other half has a copy each on sdc and sdd.  
IOW, sdd has a copy of all metadata, but sdb and sdc only have a copy of 
half the metadata each.

Since the chunk allocator creates new chunks on the device with the most 
available space, subject to the restriction that for raid1, there's two 
copies and both copies cannot be on the same device, because sdd was 
recently added and thus the one most empty, when you ran the metadata 
balance, it created one copy of the raid1 two copies on the new device as 
it had the most free space, and then had to select one of the other two 
devices for the other copy.  Since the other two devices were basically 
evenly filled, it alternated, selecting one and then the other, so each 
one got the second copy of half of the metadata, while the new device 
with the most free space got the first copy of all metadata as it was 
rewritten by the balance.

-- 
Duncan - List replies preferred.   No HTML msgs.
"Every nonfree program has a lord, a master --
and if you use the program, he is your master."  Richard Stallman

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 8/9] vfs: Add vfs_copy_file_range() support for pagecache copies

2015-10-11 Thread Christoph Hellwig
On Wed, Sep 30, 2015 at 01:26:52PM -0400, Anna Schumaker wrote:
> This allows us to have an in-kernel copy mechanism that avoids frequent
> switches between kernel and user space.  This is especially useful so
> NFSD can support server-side copies.
> 
> I make pagecache copies configurable by adding three new (exclusive)
> flags:
> - COPY_FR_REFLINK tells vfs_copy_file_range() to only create a reflink.
> - COPY_FR_COPY does a full data copy, but may be filesystem accelerated.
> - COPY_FR_DEDUP creates a reflink, but only if the contents of both
>   ranges are identical.

All but FR_COPY really should be a separate system call.  Clones (an
dedup as a special case of clones) are really a separate beast from file
copies.

If I want to clone a file I either want it clone fully or fail, not copy
a certain amount.  That means that a) we need to return an error not
short "write", and b) locking impementations are important - we need to
prevent other applications from racing with our clone even if it is
large, while to get these semantics for the possible short returning
file copy will require a proper userland locking protocol. Last but not
least file copies need to be interruptible while clones should be not.
All this is already important for local file systems and even more
important for NFS exporting.

So I'd suggest to drop this patch and just let your syscall handle
actualy copies with all their horrors.  We can go with Peng's patches
to generalize the btrfs ioctls for clones for now which is what everyone
already uses anyway, and then add a separate sys_file_clone later.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 9/9] btrfs: btrfs_copy_file_range() only supports reflinks

2015-10-11 Thread Christoph Hellwig
On Wed, Sep 30, 2015 at 01:26:53PM -0400, Anna Schumaker wrote:
> Reject copies that don't have the COPY_FR_REFLINK flag set.

I think a reflink actually is a perfectly valid copy, and I don't buy
the duplicate arguments in earlier threads.  We really need to think
more in terms of how this impacts a user and now how it's implemented
internally.  How does a user notice it's a reflink?  They don't as
implemented in btrfs and co.  Now on filesystem that don't always do
copy on write but might support reflinks (ocfs2, XFS in the future)
this becomes a bit more interesting - the difference he is that we
get an implicit fallocate when doing a real copy.  But if that's
something we have actual requests for that's how we should specify
it rather than in terms of arcane implementation details.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs says no errors, but booting gives lots of errors

2015-10-11 Thread covici
Duncan <1i5t5.dun...@cox.net> wrote:

> covici posted on Sat, 10 Oct 2015 19:08:16 -0400 as excerpted:
> 
> > cov...@ccs.covici.com wrote:
> > 
> >> Lionel Bouton  wrote:
> >> 
> >> > Le 10/10/2015 18:55, cov...@ccs.covici.com a écrit :
> >> > > [...]
> >> > > But do you folks have any idea about my original question, this
> >> > > leads me to think that btrfs is too new or something.
> >> > 
> >> > I've seen a recent report of a problem with btrfs-progs 4.2 confirmed
> >> > as a bug in mkfs. As you created the filesystem with it, it could be
> >> > the problem.
> > 
> > I do have 4.2.2, I could go to, would that be better?
> 
> btrfs-progs-4.2.2 does indeed have the mkfs.btrfs fixes for the bug in 
> question.  You should be fine remaking the filesystem with it.
> 
> If you created the filesystem with the buggy mkfs.btrfs, AFAIK, current 
> 4.2.2 btrfs check can detect the error, but can't fix it.  Blowing away 
> the filesystem and recreating is the only known fix at this time, and 
> filesystems created with the buggy version are not safe and could blow up 
> at any time, so it's best to be rid of them and onto something more 
> stable as soon as possible.
> 
> I can't help with the subvolumes bit, however, because while I'm on 
> gentoo/~amd64 here too, also with systemd...
> 
> I don't use subvolumes, as to me it's simply putting too many eggs in one 
> filesystem basket.  Instead, I prefer multiple separate btrfs 
> filesystems, each on their own partitions.  My / includes most of what 
> packages install, including /usr and /var but not /var/log.  It's 8 GiB 
> in size, under half used.  /home is separate, the repos tree (gentoo and 
> overlays) along with ccache, binpackages, the kernel tree, etc, are 
> together on a separate partition, /var/log is separate (and tiny, half a 
> GiB), etc.  I keep / mounted read-only by default, so have the parts of /
> var/lib that must be runtime-writable symlinked to subdirs of /home/var, 
> with /home of course mounted writable, but other than that and some /var/
> log/ subdirs, anything that's installed by a package is on /, a lesson I 
> learned the hard way when I had to recover from backups where /, /usr 
> and /var were from backups taken on different dates and thus not 
> synchronized with what portage /thought/ was installed based on /var/db/
> pkg.
> 
> Not saying that's best for you, but it's a solution that I've found works 
> very well for me, and the relative small 8 GiB size of / makes it easy to 
> have backup copies of it that I can boot, should my working / take a 
> dump.  But if it's all on the same filesystem, as it is with subvolumes, 
> and that filesystem takes a dump... it's all gone at once!  That's not 
> something I want to happen, so I vastly prefer the independent 
> filesystems, but with everything (but the limited exceptions mentioned 
> above) the package manager deals with on the same one, so it all stays 
> synced and is backed up as a single unit, which after all remains 
> reasonably small, 8 GiB, less than half used.

Thanks, in the ext4 world, I have lvm and lots of things using separate
lvm's.  I don't want to go back to partitions, if btrfs is that fragile,
maybe I should waita while yet.  Or, I could use lvm and put btrfs on
top of that, but it seems strange to me.

-- 
Your life is like a penny.  You're going to lose it.  The question is:
How do
you spend it?

 John Covici
 cov...@ccs.covici.com
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 6/9] vfs: Copy should use file_out rather than file_in

2015-10-11 Thread Christoph Hellwig
On Wed, Sep 30, 2015 at 01:26:50PM -0400, Anna Schumaker wrote:
> The way to think about this is that the destination filesystem reads the
> data from the source file and processes it accordingly.  This is
> especially important to avoid an infinate loop when doing a "server to
> server" copy on NFS.

And doesn't really matter without those.  Either way this looks good
enough and should be folded.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 7/9] vfs: Remove copy_file_range mountpoint checks

2015-10-11 Thread Christoph Hellwig
On Wed, Sep 30, 2015 at 01:26:51PM -0400, Anna Schumaker wrote:
> I still want to do an in-kernel copy even if the files are on different
> mountpoints, and NFS has a "server to server" copy that expects two
> files on different mountpoints.  Let's have individual filesystems
> implement this check instead.

NAK.  I thing this is a bad idea in general and will only be convinced
by a properly audited actual implementation.  And even then with a flag
where the file system specificly needs to opt into this behavior instead
of getting it by default.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 5/9] vfs: Copy shouldn't forbid ranges inside the same file

2015-10-11 Thread Christoph Hellwig
Needs to be folded.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 4/9] vfs: Copy should check len after file open mode

2015-10-11 Thread Christoph Hellwig
Should be folded into patch 1.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs says no errors, but booting gives lots of errors

2015-10-11 Thread Duncan
covici posted on Sat, 10 Oct 2015 19:08:16 -0400 as excerpted:

> cov...@ccs.covici.com wrote:
> 
>> Lionel Bouton  wrote:
>> 
>> > Le 10/10/2015 18:55, cov...@ccs.covici.com a écrit :
>> > > [...]
>> > > But do you folks have any idea about my original question, this
>> > > leads me to think that btrfs is too new or something.
>> > 
>> > I've seen a recent report of a problem with btrfs-progs 4.2 confirmed
>> > as a bug in mkfs. As you created the filesystem with it, it could be
>> > the problem.
> 
> I do have 4.2.2, I could go to, would that be better?

btrfs-progs-4.2.2 does indeed have the mkfs.btrfs fixes for the bug in 
question.  You should be fine remaking the filesystem with it.

If you created the filesystem with the buggy mkfs.btrfs, AFAIK, current 
4.2.2 btrfs check can detect the error, but can't fix it.  Blowing away 
the filesystem and recreating is the only known fix at this time, and 
filesystems created with the buggy version are not safe and could blow up 
at any time, so it's best to be rid of them and onto something more 
stable as soon as possible.

I can't help with the subvolumes bit, however, because while I'm on 
gentoo/~amd64 here too, also with systemd...

I don't use subvolumes, as to me it's simply putting too many eggs in one 
filesystem basket.  Instead, I prefer multiple separate btrfs 
filesystems, each on their own partitions.  My / includes most of what 
packages install, including /usr and /var but not /var/log.  It's 8 GiB 
in size, under half used.  /home is separate, the repos tree (gentoo and 
overlays) along with ccache, binpackages, the kernel tree, etc, are 
together on a separate partition, /var/log is separate (and tiny, half a 
GiB), etc.  I keep / mounted read-only by default, so have the parts of /
var/lib that must be runtime-writable symlinked to subdirs of /home/var, 
with /home of course mounted writable, but other than that and some /var/
log/ subdirs, anything that's installed by a package is on /, a lesson I 
learned the hard way when I had to recover from backups where /, /usr 
and /var were from backups taken on different dates and thus not 
synchronized with what portage /thought/ was installed based on /var/db/
pkg.

Not saying that's best for you, but it's a solution that I've found works 
very well for me, and the relative small 8 GiB size of / makes it easy to 
have backup copies of it that I can boot, should my working / take a 
dump.  But if it's all on the same filesystem, as it is with subvolumes, 
and that filesystem takes a dump... it's all gone at once!  That's not 
something I want to happen, so I vastly prefer the independent 
filesystems, but with everything (but the limited exceptions mentioned 
above) the package manager deals with on the same one, so it all stays 
synced and is backed up as a single unit, which after all remains 
reasonably small, 8 GiB, less than half used.

-- 
Duncan - List replies preferred.   No HTML msgs.
"Every nonfree program has a lord, a master --
and if you use the program, he is your master."  Richard Stallman

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/7] btrfs: introduce _in_rcu variants of message printing functions

2015-10-11 Thread David Sterba
Due to the missing variants there are messages that lack the information
printed by btrfs_info etc helpers.

Signed-off-by: David Sterba 
---
 fs/btrfs/ctree.h | 29 +
 1 file changed, 29 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 938efe33be80..88acdffbe384 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4039,14 +4039,43 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, 
const char *fmt, ...)
 #define btrfs_info(fs_info, fmt, args...) \
btrfs_printk(fs_info, KERN_INFO fmt, ##args)
 
+/*
+ * Wrappers that use printk_in_rcu
+ */
+#define btrfs_emerg_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args)
+#define btrfs_alert_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args)
+#define btrfs_crit_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args)
+#define btrfs_err_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args)
+#define btrfs_warn_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args)
+#define btrfs_notice_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args)
+#define btrfs_info_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args)
+
 #ifdef DEBUG
 #define btrfs_debug(fs_info, fmt, args...) \
btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
+#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
 #else
 #define btrfs_debug(fs_info, fmt, args...) \
 no_printk(KERN_DEBUG fmt, ##args)
+#define btrfs_debug_in_rcu(fs_info, fmt, args...) \
+   no_printk(KERN_DEBUG fmt, ##args)
 #endif
 
+#define btrfs_printk_in_rcu(fs_info, fmt, args...) \
+do {   \
+   rcu_read_lock();\
+   btrfs_printk(fs_info, fmt, ##args); \
+   rcu_read_unlock();  \
+} while (0)
+
 #ifdef CONFIG_BTRFS_ASSERT
 
 __cold
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/7] btrfs: switch message printers to ratelimited variants

2015-10-11 Thread David Sterba
Signed-off-by: David Sterba 
---
 fs/btrfs/disk-io.c  | 21 ++---
 fs/btrfs/extent-tree.c  | 14 --
 fs/btrfs/extent_io.c|  4 ++--
 fs/btrfs/free-space-cache.c | 10 +-
 fs/btrfs/inode.c|  7 ++-
 fs/btrfs/scrub.c|  8 
 6 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ba41faf623ce..72553cd9ed14 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -319,9 +319,9 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
memcpy(, result, csum_size);
 
read_extent_buffer(buf, , 0, csum_size);
-   printk_ratelimited(KERN_WARNING
-   "BTRFS: %s checksum verify failed on %llu 
wanted %X found %X "
-   "level %d\n",
+   btrfs_warn_rl(fs_info,
+   "%s checksum verify failed on %llu wanted %X 
found %X "
+   "level %d",
fs_info->sb->s_id, buf->start,
val, found, btrfs_header_level(buf));
if (result != (char *)_result)
@@ -368,9 +368,9 @@ static int verify_parent_transid(struct extent_io_tree 
*io_tree,
ret = 0;
goto out;
}
-   printk_ratelimited(KERN_ERR
-   "BTRFS (device %s): parent transid verify failed on %llu wanted 
%llu found %llu\n",
-   eb->fs_info->sb->s_id, eb->start,
+   btrfs_err_rl(eb->fs_info,
+   "parent transid verify failed on %llu wanted %llu found %llu",
+   eb->start,
parent_transid, btrfs_header_generation(eb));
ret = 1;
 
@@ -629,15 +629,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio 
*io_bio,
 
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
-   printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block 
start "
-  "%llu %llu\n",
-  eb->fs_info->sb->s_id, found_start, eb->start);
+   btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu",
+  found_start, eb->start);
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(root->fs_info, eb)) {
-   printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on 
block %llu\n",
-  eb->fs_info->sb->s_id, eb->start);
+   btrfs_err_rl(eb->fs_info, "bad fsid on block %llu",
+  eb->start);
ret = -EIO;
goto err;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9f9604201333..913a5836e604 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8271,10 +8271,11 @@ static noinline int do_walk_down(struct 
btrfs_trans_handle *trans,
ret = account_shared_subtree(trans, root, next,
 generation, level - 1);
if (ret) {
-   printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+   btrfs_err_rl(root->fs_info,
+   "Error "
"%d accounting shared subtree. Quota "
-   "is out of sync, rescan required.\n",
-   root->fs_info->sb->s_id, ret);
+   "is out of sync, rescan required.",
+   ret);
}
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
@@ -8363,10 +8364,11 @@ static noinline int walk_up_proc(struct 
btrfs_trans_handle *trans,
BUG_ON(ret); /* -ENOMEM */
ret = account_leaf_items(trans, root, eb);
if (ret) {
-   printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+   btrfs_err_rl(root->fs_info,
+   "error "
"%d accounting leaf items. Quota "
-   "is out of sync, rescan required.\n",
-   root->fs_info->sb->s_id, ret);
+   "is out of sync, rescan required.",
+   ret);
}
}
/* make block locked assertion in clean_tree_block happy */
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 58b9ccdfe2e9..bb318b4e5dd0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -96,8 +96,8 

[PATCH 7/7] btrfs: switch more printks to our helpers

2015-10-11 Thread David Sterba
Convert the simple cases, not all functions provide a way to reach the
fs_info. Also skipped debugging messages (print-tree, integrity
checker and pr_debug) and messages that are printed from possibly
unfinished mount.

Signed-off-by: David Sterba 
---
 fs/btrfs/disk-io.c   | 12 ++--
 fs/btrfs/extent_io.c | 18 ++
 fs/btrfs/ioctl.c |  6 +++---
 fs/btrfs/root-tree.c |  5 +++--
 fs/btrfs/send.c  |  2 +-
 fs/btrfs/volumes.c   |  3 ++-
 6 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 72553cd9ed14..5bc94d2d5699 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2347,8 +2347,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
u64 bytenr = btrfs_super_log_root(disk_super);
 
if (fs_devices->rw_devices == 0) {
-   printk(KERN_WARNING "BTRFS: log replay required "
-  "on RO media\n");
+   btrfs_warn(fs_info, "log replay required on RO media");
return -EIO;
}
 
@@ -2363,12 +2362,12 @@ static int btrfs_replay_log(struct btrfs_fs_info 
*fs_info,
log_tree_root->node = read_tree_block(tree_root, bytenr,
fs_info->generation + 1);
if (IS_ERR(log_tree_root->node)) {
-   printk(KERN_ERR "BTRFS: failed to read log tree\n");
+   btrfs_warn(fs_info, "failed to read log tree");
ret = PTR_ERR(log_tree_root->node);
kfree(log_tree_root);
return ret;
} else if (!extent_buffer_uptodate(log_tree_root->node)) {
-   printk(KERN_ERR "BTRFS: failed to read log tree\n");
+   btrfs_err(fs_info, "failed to read log tree");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
return -EIO;
@@ -3294,8 +3293,9 @@ static int write_dev_supers(struct btrfs_device *device,
bh = __getblk(device->bdev, bytenr / 4096,
  BTRFS_SUPER_INFO_SIZE);
if (!bh) {
-   printk(KERN_ERR "BTRFS: couldn't get super "
-  "buffer head for bytenr %Lu\n", bytenr);
+   btrfs_err(device->dev_root->fs_info,
+   "couldn't get super buffer head for bytenr 
%llu",
+   bytenr);
errors++;
continue;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index bb318b4e5dd0..c7a33913188b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5563,13 +5563,15 @@ void memcpy_extent_buffer(struct extent_buffer *dst, 
unsigned long dst_offset,
unsigned long src_i;
 
if (src_offset + len > dst->len) {
-   printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
-  "len %lu dst len %lu\n", src_offset, len, dst->len);
+   btrfs_err(dst->fs_info,
+   "memmove bogus src_offset %lu move "
+  "len %lu dst len %lu", src_offset, len, dst->len);
BUG_ON(1);
}
if (dst_offset + len > dst->len) {
-   printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
-  "len %lu dst len %lu\n", dst_offset, len, dst->len);
+   btrfs_err(dst->fs_info,
+   "memmove bogus dst_offset %lu move "
+  "len %lu dst len %lu", dst_offset, len, dst->len);
BUG_ON(1);
}
 
@@ -5609,13 +5611,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, 
unsigned long dst_offset,
unsigned long src_i;
 
if (src_offset + len > dst->len) {
-   printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
-  "len %lu len %lu\n", src_offset, len, dst->len);
+   btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move "
+  "len %lu len %lu", src_offset, len, dst->len);
BUG_ON(1);
}
if (dst_offset + len > dst->len) {
-   printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
-  "len %lu len %lu\n", dst_offset, len, dst->len);
+   btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move "
+  "len %lu len %lu", dst_offset, len, dst->len);
BUG_ON(1);
}
if (dst_offset < src_offset) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8e9105af723e..2e520c635709 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1342,7 +1342,7 @@ int btrfs_defrag_file(struct inode *inode, struct file 
*file,
break;
 
if (btrfs_defrag_cancelled(root->fs_info)) {
-   printk(KERN_DEBUG 

[PATCH 2/7] btrfs: switch message printers to _in_rcu variants

2015-10-11 Thread David Sterba
Signed-off-by: David Sterba 
---
 fs/btrfs/check-integrity.c |  4 ++--
 fs/btrfs/dev-replace.c | 16 
 fs/btrfs/ioctl.c   |  2 +-
 fs/btrfs/scrub.c   | 14 +++---
 fs/btrfs/volumes.c | 18 +-
 5 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 541fbfaed276..3e20630db8aa 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -845,8 +845,8 @@ static int btrfsic_process_superblock_dev_mirror(
superblock_tmp->never_written = 0;
superblock_tmp->mirror_num = 1 + superblock_mirror_num;
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
-   printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, 
%s)"
-" @%llu (%s/%llu/%d)\n",
+   btrfs_info_in_rcu(device->dev_root->fs_info,
+   "new initial S-block (bdev %p, %s) @%llu 
(%s/%llu/%d)",
 superblock_bdev,
 rcu_str_deref(device->name), dev_bytenr,
 dev_state->name, dev_bytenr,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e54dd5905cee..ede1c1ad0baf 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -379,8 +379,8 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
if (ret)
btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
 
-   printk_in_rcu(KERN_INFO
- "BTRFS: dev_replace from %s (devid %llu) to %s started\n",
+   btrfs_info_in_rcu(root->fs_info,
+ "dev_replace from %s (devid %llu) to %s started",
  src_device->missing ? "" :
rcu_str_deref(src_device->name),
  src_device->devid,
@@ -523,8 +523,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info 
*fs_info,
src_device,
tgt_device);
} else {
-   printk_in_rcu(KERN_ERR
- "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed 
%d\n",
+   btrfs_err_in_rcu(root->fs_info,
+ "btrfs_scrub_dev(%s, %llu, %s) failed %d",
  src_device->missing ? "" :
rcu_str_deref(src_device->name),
  src_device->devid,
@@ -540,8 +540,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info 
*fs_info,
return scrub_ret;
}
 
-   printk_in_rcu(KERN_INFO
- "BTRFS: dev_replace from %s (devid %llu) to %s 
finished\n",
+   btrfs_info_in_rcu(root->fs_info,
+ "dev_replace from %s (devid %llu) to %s finished",
  src_device->missing ? "" :
rcu_str_deref(src_device->name),
  src_device->devid,
@@ -809,8 +809,8 @@ static int btrfs_dev_replace_kthread(void *data)
progress = status_args->status.progress_1000;
kfree(status_args);
progress = div_u64(progress, 10);
-   printk_in_rcu(KERN_INFO
-   "BTRFS: continuing dev_replace from %s (devid %llu) to 
%s @%u%%\n",
+   btrfs_info_in_rcu(fs_info,
+   "continuing dev_replace from %s (devid %llu) to %s 
@%u%%",
dev_replace->srcdev->missing ? "" :
rcu_str_deref(dev_replace->srcdev->name),
dev_replace->srcdev->devid,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0adf5422fce9..8e9105af723e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1579,7 +1579,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
new_size = div_u64(new_size, root->sectorsize);
new_size *= root->sectorsize;
 
-   printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
+   btrfs_info_in_rcu(root->fs_info, "new size for %s is %llu",
  rcu_str_deref(device->name), new_size);
 
if (new_size > old_size) {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a39f5d1144e8..26cfbb0b867c 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -580,9 +580,9 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, 
u64 root,
 * hold all of the paths here
 */
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
-   printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
+   btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev "
"%s, sector %llu, root %llu, inode %llu, offset %llu, "
-   "length %llu, links %u (path: %s)\n", 

[PATCH 4/7] btrfs: switch message printers to ratelimited _in_rcu variants

2015-10-11 Thread David Sterba
Signed-off-by: David Sterba 
---
 fs/btrfs/disk-io.c   |  4 ++--
 fs/btrfs/extent_io.c |  4 ++--
 fs/btrfs/scrub.c | 20 ++--
 fs/btrfs/volumes.c   |  4 ++--
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 295795aebe0b..ba41faf623ce 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3175,8 +3175,8 @@ static void btrfs_end_buffer_write_sync(struct 
buffer_head *bh, int uptodate)
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;
 
-   printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write 
due to "
- "I/O error on %s\n",
+   btrfs_warn_rl_in_rcu(device->dev_root->fs_info,
+   "lost page write due to IO error on %s",
  rcu_str_deref(device->name));
/* note, we dont' set_buffer_write_io_error because we have
 * our own ways of dealing with the IO errors
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e2357e31609a..58b9ccdfe2e9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2078,8 +2078,8 @@ int repair_io_failure(struct inode *inode, u64 start, u64 
length, u64 logical,
return -EIO;
}
 
-   printk_ratelimited_in_rcu(KERN_INFO
- "BTRFS: read error corrected: ino %llu off 
%llu (dev %s sector %llu)\n",
+   btrfs_info_rl_in_rcu(fs_info,
+   "read error corrected: ino %llu off %llu (dev %s sector %llu)",
  btrfs_ino(inode), start,
  rcu_str_deref(dev->name), sector);
bio_put(bio);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 26cfbb0b867c..bce86f8772e1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -850,8 +850,8 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
btrfs_dev_replace_stats_inc(
>dev_root->fs_info->dev_replace.
num_uncorrectable_read_errors);
-   printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
-   "unable to fixup (nodatasum) error at logical %llu on dev 
%s\n",
+   btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
+   "unable to fixup (nodatasum) error at logical %llu on dev 
%s",
fixup->logical, rcu_str_deref(fixup->dev->name));
}
 
@@ -1230,8 +1230,8 @@ static int scrub_handle_errored_block(struct scrub_block 
*sblock_to_check)
sctx->stat.corrected_errors++;
sblock_to_check->data_corrected = 1;
spin_unlock(>stat_lock);
-   printk_ratelimited_in_rcu(KERN_ERR
-   "BTRFS: fixed up error at logical %llu on dev 
%s\n",
+   btrfs_err_rl_in_rcu(fs_info,
+   "fixed up error at logical %llu on dev %s",
logical, rcu_str_deref(dev->name));
}
} else {
@@ -1239,8 +1239,8 @@ static int scrub_handle_errored_block(struct scrub_block 
*sblock_to_check)
spin_lock(>stat_lock);
sctx->stat.uncorrectable_errors++;
spin_unlock(>stat_lock);
-   printk_ratelimited_in_rcu(KERN_ERR
-   "BTRFS: unable to fixup (regular) error at logical %llu 
on dev %s\n",
+   btrfs_err_rl_in_rcu(fs_info,
+   "unable to fixup (regular) error at logical %llu on dev 
%s",
logical, rcu_str_deref(dev->name));
}
 
@@ -2201,15 +2201,15 @@ static void scrub_missing_raid56_worker(struct 
btrfs_work *work)
spin_lock(>stat_lock);
sctx->stat.read_errors++;
spin_unlock(>stat_lock);
-   printk_ratelimited_in_rcu(KERN_ERR
-   "BTRFS: I/O error rebulding logical %llu for dev %s\n",
+   btrfs_err_rl_in_rcu(fs_info,
+   "IO error rebuilding logical %llu for dev %s",
logical, rcu_str_deref(dev->name));
} else if (sblock->header_error || sblock->checksum_error) {
spin_lock(>stat_lock);
sctx->stat.uncorrectable_errors++;
spin_unlock(>stat_lock);
-   printk_ratelimited_in_rcu(KERN_ERR
-   "BTRFS: failed to rebuild valid logical %llu for dev 
%s\n",
+   btrfs_err_rl_in_rcu(fs_info,
+   "failed to rebuild valid logical %llu for dev %s",
logical, rcu_str_deref(dev->name));
} else {
scrub_write_block_to_dev_replace(sblock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b0dae14242e8..4f80986dbf3c 100644
--- 

[PATCH 5/7] btrfs: introduce ratelimited variants of message printing functions

2015-10-11 Thread David Sterba
Signed-off-by: David Sterba 
---
 fs/btrfs/ctree.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9c0fd901edbe..bca42c5733a1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4075,6 +4075,23 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, 
const char *fmt, ...)
 #define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \
btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args)
 
+/*
+ * Wrappers that use a ratelimited printk
+ */
+#define btrfs_emerg_rl(fs_info, fmt, args...) \
+   btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args)
+#define btrfs_alert_rl(fs_info, fmt, args...) \
+   btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args)
+#define btrfs_crit_rl(fs_info, fmt, args...) \
+   btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args)
+#define btrfs_err_rl(fs_info, fmt, args...) \
+   btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args)
+#define btrfs_warn_rl(fs_info, fmt, args...) \
+   btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args)
+#define btrfs_notice_rl(fs_info, fmt, args...) \
+   btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args)
+#define btrfs_info_rl(fs_info, fmt, args...) \
+   btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args)
 #ifdef DEBUG
 #define btrfs_debug(fs_info, fmt, args...) \
btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
@@ -4082,6 +4099,8 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, 
const char *fmt, ...)
btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
 #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
+#define btrfs_debug_rl(fs_info, fmt, args...) \
+   btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args)
 #else
 #define btrfs_debug(fs_info, fmt, args...) \
 no_printk(KERN_DEBUG fmt, ##args)
@@ -4089,6 +4108,8 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, 
const char *fmt, ...)
no_printk(KERN_DEBUG fmt, ##args)
 #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
no_printk(KERN_DEBUG fmt, ##args)
+#define btrfs_debug_rl(fs_info, fmt, args...) \
+   no_printk(KERN_DEBUG fmt, ##args)
 #endif
 
 #define btrfs_printk_in_rcu(fs_info, fmt, args...) \
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PULL][PATCH 0/7] Updates in message printers and formatters

2015-10-11 Thread David Sterba
Hi,

this was inspired by recent Anand's post to change the printing format of the
btrfs_err etc. helpers to include UUID. We do not use the helpers everywhere,
so I've introduced all the missing variants that require eg printing the
rcu_string (device name), are ratelimited or are both ratelimited and using
rcu_string.

First part introduces macros and switches existing users, the patch 7/7 converts
some printks to the helpers (avoiding mostly debugging messages).

In the end we should route all user-visile messages through some common
formatter, this is preparatory work. The mount-time messages are still
printk-based.

Please consider for the 4.4 dev cycle. Thanks.



The following changes since commit 9ffecb10283508260936b96022d4ee43a7798b4c:

  Linux 4.3-rc3 (2015-09-27 07:50:08 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git cleanup/messages

for you to fetch changes up to f14d104dbdb5044dac9acd0e983ffb60f706c746:

  btrfs: switch more printks to our helpers (2015-10-08 13:08:03 +0200)


David Sterba (7):
  btrfs: introduce _in_rcu variants of message printing functions
  btrfs: switch message printers to _in_rcu variants
  btrfs: introduce ratelimited _in_rcu variants of message printing 
functions
  btrfs: switch message printers to ratelimited _in_rcu variants
  btrfs: introduce ratelimited variants of message printing functions
  btrfs: switch message printers to ratelimited variants
  btrfs: switch more printks to our helpers

 fs/btrfs/check-integrity.c  |  4 +--
 fs/btrfs/ctree.h| 88 +
 fs/btrfs/dev-replace.c  | 16 -
 fs/btrfs/disk-io.c  | 37 ++-
 fs/btrfs/extent-tree.c  | 14 
 fs/btrfs/extent_io.c| 26 +++---
 fs/btrfs/free-space-cache.c | 10 +++---
 fs/btrfs/inode.c|  7 ++--
 fs/btrfs/ioctl.c|  8 ++---
 fs/btrfs/root-tree.c|  5 +--
 fs/btrfs/scrub.c| 42 +++---
 fs/btrfs/send.c |  2 +-
 fs/btrfs/volumes.c  | 25 ++---
 13 files changed, 187 insertions(+), 97 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/7] btrfs: introduce ratelimited _in_rcu variants of message printing functions

2015-10-11 Thread David Sterba
Signed-off-by: David Sterba 
---
 fs/btrfs/ctree.h | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 88acdffbe384..9c0fd901edbe 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4057,16 +4057,38 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, 
const char *fmt, ...)
 #define btrfs_info_in_rcu(fs_info, fmt, args...) \
btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args)
 
+/*
+ * Wrappers that use a ratelimited printk_in_rcu
+ */
+#define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args)
+#define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args)
+#define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args)
+#define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args)
+#define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args)
+#define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args)
+#define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args)
+
 #ifdef DEBUG
 #define btrfs_debug(fs_info, fmt, args...) \
btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
 #define btrfs_debug_in_rcu(fs_info, fmt, args...) \
btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
+#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
+   btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
 #else
 #define btrfs_debug(fs_info, fmt, args...) \
 no_printk(KERN_DEBUG fmt, ##args)
 #define btrfs_debug_in_rcu(fs_info, fmt, args...) \
no_printk(KERN_DEBUG fmt, ##args)
+#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
+   no_printk(KERN_DEBUG fmt, ##args)
 #endif
 
 #define btrfs_printk_in_rcu(fs_info, fmt, args...) \
@@ -4076,6 +4098,22 @@ do { 
\
rcu_read_unlock();  \
 } while (0)
 
+#define btrfs_printk_ratelimited(fs_info, fmt, args...)\
+do {   \
+   static DEFINE_RATELIMIT_STATE(_rs,  \
+   DEFAULT_RATELIMIT_INTERVAL, \
+   DEFAULT_RATELIMIT_BURST);   \
+   if (__ratelimit(&_rs))  \
+   btrfs_printk(fs_info, fmt, ##args); \
+} while (0)
+
+#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...)  \
+do {   \
+   rcu_read_lock();\
+   btrfs_printk_ratelimited(fs_info, fmt, ##args); \
+   rcu_read_unlock();  \
+} while (0)
+
 #ifdef CONFIG_BTRFS_ASSERT
 
 __cold
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: clear bio reference after submit_one_bio()

2015-10-11 Thread Alex Lyakas
Hi Naota,

What happens if btrfs_bio_alloc() in submit_extent_page fails? Then we
return -ENOMEM to the caller, but we do not set *bio_ret to NULL. And
if *bio_ret was non-NULL upon entry into submit_extent_page, then we
had submitted this bio before getting to btrfs_bio_alloc(). So should
btrfs_bio_alloc() failure be handled in the same way?

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3915c94..cd443bc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2834,8 +2834,11 @@ static int submit_extent_page(int rw, struct
extent_io_tree *tree,

bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
GFP_NOFS | __GFP_HIGH);
-   if (!bio)
+   if (!bio) {
+   if (bio_ret)
+   *bio_ret = NULL;
return -ENOMEM;
+   }

bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;


Thanks,
Alex.

On Wed, Jan 7, 2015 at 12:46 AM, Satoru Takeuchi
 wrote:
> Hi Naota,
>
> On 2015/01/06 1:01, Naohiro Aota wrote:
>> After submit_one_bio(), `bio' can go away. However submit_extent_page()
>> leave `bio' referable if submit_one_bio() failed (e.g. -ENOMEM on OOM).
>> It will cause invalid paging request when submit_extent_page() is called
>> next time.
>>
>> I reproduced ENOMEM case with the following script (need
>> CONFIG_FAIL_PAGE_ALLOC, and CONFIG_FAULT_INJECTION_DEBUG_FS).
>
> I confirmed that this problem reproduce with 3.19-rc3 and
> not reproduce with 3.19-rc3 with your patch.
>
> Tested-by: Satoru Takeuchi 
>
> Thank you for reporting this problem with the reproducer
> and fixing it too.
>
>   NOTE:
>   I used v3.19-rc3's tools/testing/fault-injection/failcmd.sh
>   for the following "./failcmd.sh".
>
>   >./failcmd.sh -p $percent -t $times -i $interval \
>   >--ignore-gfp-highmem=N --ignore-gfp-wait=N 
> --min-order=0 \
>   >-- \
>   >cat $directory/file > /dev/null
>
> * 3.19-rc1 + your patch
>
> ===
> # ./run
> 512+0 records in
> 512+0 records out
> #
> ===
>
> * 3.19-rc3
>
> ===
> # ./run
> 512+0 records in
> 512+0 records out
> [  188.433726] run (776): drop_caches: 1
> [  188.682372] FAULT_INJECTION: forcing a failure.
> name fail_page_alloc, interval 100, probability 111000, space 0, times 3
> [  188.689986] CPU: 0 PID: 954 Comm: cat Not tainted 3.19.0-rc3-ktest #1
> [  188.693834] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> Bochs 01/01/2011
> [  188.698466]  0064 88007b343618 816e5563 
> 88007fc0fc78
> [  188.702730]  81c655c0 88007b343638 813851b5 
> 0010
> [  188.707043]  0002 88007b343768 81188126 
> 88007b3435a8
> [  188.711283] Call Trace:
> [  188.712620]  [] dump_stack+0x45/0x57
> [  188.715330]  [] should_fail+0x135/0x140
> [  188.718218]  [] __alloc_pages_nodemask+0xd6/0xb30
> [  188.721567]  [] ? blk_rq_map_sg+0x35/0x170
> [  188.724558]  [] ? virtio_queue_rq+0x145/0x2b0 
> [virtio_blk]
> [  188.728191]  [] ? 
> btrfs_submit_compressed_read+0xcf/0x4d0 [btrfs]
> [  188.732079]  [] ? kmem_cache_alloc+0x1cb/0x230
> [  188.735153]  [] ? mempool_alloc_slab+0x15/0x20
> [  188.738188]  [] alloc_pages_current+0x9a/0x120
> [  188.741153]  [] btrfs_submit_compressed_read+0x1a9/0x4d0 
> [btrfs]
> [  188.744835]  [] btrfs_submit_bio_hook+0x1c1/0x1d0 [btrfs]
> [  188.748225]  [] ? lookup_extent_mapping+0x13/0x20 [btrfs]
> [  188.751547]  [] ? btrfs_get_extent+0x98/0xad0 [btrfs]
> [  188.754656]  [] submit_one_bio+0x67/0xa0 [btrfs]
> [  188.757554]  [] submit_extent_page.isra.35+0xd7/0x1c0 
> [btrfs]
> [  188.760981]  [] __do_readpage+0x31d/0x7b0 [btrfs]
> [  188.763920]  [] ? btrfs_create_repair_bio+0x110/0x110 
> [btrfs]
> [  188.767382]  [] ? btrfs_submit_direct+0x7b0/0x7b0 [btrfs]
> [  188.770671]  [] ? btrfs_lookup_ordered_range+0x13d/0x180 
> [btrfs]
> [  188.774366]  [] 
> __extent_readpages.constprop.42+0x2ba/0x2d0 [btrfs]
> [  188.778031]  [] ? btrfs_submit_direct+0x7b0/0x7b0 [btrfs]
> [  188.781241]  [] extent_readpages+0x169/0x1b0 [btrfs]
> [  188.784322]  [] ? btrfs_submit_direct+0x7b0/0x7b0 [btrfs]
> [  188.789014]  [] btrfs_readpages+0x1f/0x30 [btrfs]
> [  188.792028]  [] __do_page_cache_readahead+0x18c/0x1f0
> [  188.795078]  [] ondemand_readahead+0xdf/0x260
> [  188.797702]  [] ? btrfs_congested_fn+0x5f/0xa0 [btrfs]
> [  188.800718]  [] page_cache_async_readahead+0x71/0xa0
> [  188.803650]  [] generic_file_read_iter+0x40f/0x5e0
> [  188.806480]  [] new_sync_read+0x7e/0xb0
> [  188.808832]  [] __vfs_read+0x18/0x50
> [  188.811068]  [] vfs_read+0x8a/0x140
> [  188.813298]  [] SyS_read+0x46/0xb0

filesystem goes ro trying to balance. "cpu stuck"

2015-10-11 Thread Donald Pearson
Kernel 4.2.2-1.el7.elrepo
btrfs-progs v4.2.1

I'm attempting to convert a filesystem from raid6 to raid10.  I didn't
have any functional problems with it, but performance is abysmal
compared to basically the same arrangement in raid10 so I thought I'd
just get away from raid56 for a while (I also saw something about
parity raid code developed beyond 2-disk parity that was
ignored/thrown away so I'm thinking the devs don't care much about
about parity raid at least for now).

Partway through the balance something goes wrong and filesystem is
forced read-only stopping the balance.

I did a fschk and it didn't complain about/find any errors.  The
drives aren't throwing any errors or incrementing any smart
attributes.  This is a backup array, so it's not the end of the world
if I have to just blow it away and rebuild as raid10 from scratch.

The console prints this error.
NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [btrfs-balance:8015]

Here's the fun stuff out of dmesg

[183120.853367] INFO: rcu_sched self-detected stall on CPU { 0}
(t=7620235 jiffies g=3046202 c=3046201 q=0)
[183120.856391] INFO: rcu_sched detected stalls on CPUs/tasks: { 0}
(detected by 3, t=7620238 jiffies, g=3046202, c=3046201, q=0)
[183120.856393] Task dump for CPU 0:
[183120.856401] btrfs-balance   R  running task0  8015  2 0x0088
[183120.856407]  8800d8a6f8f8 816c9b6f 81a2b500
880036f4
[183120.856411]  88040d0d5140 8800d8a7 8804094c4620
8804094c4618
[183120.856414]  880036f4 8800d0e8b1a0 8800d8a6f918
816ca177
[183120.856416] Call Trace:
[183120.856428]  [] ? __schedule+0x2af/0x880
[183120.856435]  [] schedule+0x37/0x80
[183120.856441]  [] schedule_timeout+0x201/0x2a0
[183120.856448]  [] ? wake_up_worker+0x24/0x30
[183120.856451]  [] ? insert_work+0x62/0xa0
[183120.856457]  [] ? __set_page_dirty_nobuffers+0xe7/0x140
[183120.856463]  [] ? list_del+0x11/0x40
[183120.856468]  [] wait_for_completion+0x111/0x130
[183120.856474]  [] ? wake_up_q+0x80/0x80
[183120.856522]  []
btrfs_async_run_delayed_refs+0x133/0x150 [btrfs]
[183120.856527]  [] ? __slab_free+0x11f/0x217
[183120.856573]  [] ?
invalidate_extent_cache+0x49/0x1a0 [btrfs]
[183120.856579]  [] ? kmem_cache_alloc+0x1c8/0x1f0
[183120.856615]  [] ? btrfs_drop_snapshot+0x6c/0x850 [btrfs]
[183120.856658]  [] ? __del_reloc_root+0xb9/0xf0 [btrfs]
[183120.856700]  [] ? __del_reloc_root+0x41/0xf0 [btrfs]
[183120.856742]  [] ? __del_reloc_root+0x30/0xf0 [btrfs]
[183120.856783]  [] ? free_reloc_roots+0x25/0x40 [btrfs]
[183120.856825]  [] ? merge_reloc_roots+0x173/0x240 [btrfs]
[183120.856869]  [] ? relocate_block_group+0x265/0x640 [btrfs]
[183120.856912]  [] ?
btrfs_relocate_block_group+0x1c3/0x2d0 [btrfs]
[183120.856957]  [] ?
btrfs_relocate_chunk.isra.39+0x3e/0xc0 [btrfs]
[183120.857001]  [] ? __btrfs_balance+0x49e/0x8e0 [btrfs]
[183120.857046]  [] ? btrfs_balance+0x37d/0x650 [btrfs]
[183120.857090]  [] ? balance_kthread+0x5d/0x80 [btrfs]
[183120.857134]  [] ? btrfs_balance+0x650/0x650 [btrfs]
[183120.857140]  [] ? kthread+0xd8/0xf0
[183120.857146]  [] ? kthread_create_on_node+0x1b0/0x1b0
[183120.857150]  [] ? ret_from_fork+0x3f/0x70
[183120.857155]  [] ? kthread_create_on_node+0x1b0/0x1b0
[183120.882383] Task dump for CPU 0:
[183120.882385] btrfs-balance   R  running task0  8015  2 0x0088
[183120.882387]  880036f4 d292fc58 88041fc03d78
810a636f
[183120.882390]   81a75300 88041fc03d98
810a8c4d
[183120.882392]  0083 0001 88041fc03dc8
810da114
[183120.882394] Call Trace:
[183120.882396][] sched_show_task+0xaf/0x110
[183120.882400]  [] dump_cpu_task+0x3d/0x50
[183120.882404]  [] rcu_dump_cpu_stacks+0x84/0xc0
[183120.882406]  [] rcu_check_callbacks+0x4c2/0x7b0
[183120.882409]  [] ? acct_account_cputime+0x1c/0x20
[183120.882412]  [] ? account_system_time+0x83/0x120
[183120.882414]  [] ? tick_sched_do_timer+0x50/0x50
[183120.882417]  [] update_process_times+0x39/0x60
[183120.882420]  [] tick_sched_handle.isra.17+0x25/0x60
[183120.882422]  [] tick_sched_timer+0x44/0x80
[183120.882425]  [] __hrtimer_run_queues+0xf3/0x220
[183120.882428]  [] hrtimer_interrupt+0xa8/0x1a0
[183120.882430]  [] local_apic_timer_interrupt+0x39/0x60
[183120.882433]  [] smp_apic_timer_interrupt+0x45/0x60
[183120.882436]  [] apic_timer_interrupt+0x6b/0x70
[183120.882437][] ?
__del_reloc_root+0xb9/0xf0 [btrfs]
[183120.882471]  [] ? __del_reloc_root+0x41/0xf0 [btrfs]
[183120.882488]  [] ? __del_reloc_root+0x30/0xf0 [btrfs]
[183120.882505]  [] free_reloc_roots+0x25/0x40 [btrfs]
[183120.882521]  [] merge_reloc_roots+0x173/0x240 [btrfs]
[183120.882539]  [] relocate_block_group+0x265/0x640 [btrfs]
[183120.882556]  []
btrfs_relocate_block_group+0x1c3/0x2d0 [btrfs]
[183120.882574]  []
btrfs_relocate_chunk.isra.39+0x3e/0xc0 [btrfs]
[183120.882591]  [] __btrfs_balance+0x49e/0x8e0 [btrfs]
[183120.882609]  [] 

[PATCH 4/6] btrfs: add comments to barriers before waitqueue_active

2015-10-11 Thread David Sterba
Reduce number of undocumented barriers out there.

Signed-off-by: David Sterba 
---
 fs/btrfs/compression.c  | 3 +++
 fs/btrfs/extent-tree.c  | 3 +--
 fs/btrfs/locking.c  | 3 +++
 fs/btrfs/ordered-data.c | 6 ++
 fs/btrfs/transaction.c  | 3 +++
 5 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 57ee8ca29b06..3a9317ce67f8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -839,6 +839,9 @@ static void free_workspace(int type, struct list_head 
*workspace)
btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(alloc_workspace);
 wake:
+   /*
+* Make sure counter is updated before we wake up waiters.
+*/
smp_mb();
if (waitqueue_active(workspace_wait))
wake_up(workspace_wait);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9f9604201333..59eb92f65c62 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10363,8 +10363,7 @@ void btrfs_end_write_no_snapshoting(struct btrfs_root 
*root)
 {
percpu_counter_dec(>subv_writers->counter);
/*
-* Make sure counter is updated before we wake up
-* waiters.
+* Make sure counter is updated before we wake up waiters.
 */
smp_mb();
if (waitqueue_active(>subv_writers->wait))
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index d7e6baf1b205..03f8630dbaf2 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -280,6 +280,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
WARN_ON(atomic_read(>spinning_writers));
atomic_dec(>blocking_writers);
+   /*
+* Make sure counter is updated before we wake up waiters.
+*/
smp_mb();
if (waitqueue_active(>write_lock_wq))
wake_up(>write_lock_wq);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 52170cf1757e..071005f008c1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -345,6 +345,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode 
*inode,
 
if (entry->bytes_left == 0) {
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, >flags);
+   /*
+* Implicit memory barrier after test_and_set_bit
+*/
if (waitqueue_active(>wait))
wake_up(>wait);
} else {
@@ -409,6 +412,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
 
if (entry->bytes_left == 0) {
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, >flags);
+   /*
+* Implicit memory barrier after test_and_set_bit
+*/
if (waitqueue_active(>wait))
wake_up(>wait);
} else {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e8e5b5a10719..3fd70f797b7d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -861,6 +861,9 @@ static int __btrfs_end_transaction(struct 
btrfs_trans_handle *trans,
atomic_dec(_trans->num_writers);
extwriter_counter_dec(cur_trans, trans->type);
 
+   /*
+* Make sure counter is updated before we wake up waiters.
+*/
smp_mb();
if (waitqueue_active(_trans->writer_wait))
wake_up(_trans->writer_wait);
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/6] btrfs: remove waitqueue_active check from btrfs_rm_dev_replace_unblocked

2015-10-11 Thread David Sterba
Normally the waitqueue_active would need a barrier, but this is not
necessary here because it's not a performance sensitive context and we
can call wake_up directly.

Suggested-by: Chris Mason 
Signed-off-by: David Sterba 
---
 fs/btrfs/dev-replace.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e54dd5905cee..733ff75b620e 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -454,8 +454,7 @@ static void btrfs_rm_dev_replace_blocked(struct 
btrfs_fs_info *fs_info)
 static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
 {
clear_bit(BTRFS_FS_STATE_DEV_REPLACING, _info->fs_state);
-   if (waitqueue_active(_info->replace_wait))
-   wake_up(_info->replace_wait);
+   wake_up(_info->replace_wait);
 }
 
 static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/6] btrfs: comment waitqueue_active implied by locks

2015-10-11 Thread David Sterba
Suggested-by: Chris Mason 
Signed-off-by: David Sterba 
---
 fs/btrfs/raid56.c   | 6 +-
 fs/btrfs/tree-log.c | 6 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index fcf7265ca46f..1a33d3eb36de 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -810,7 +810,11 @@ static noinline void unlock_stripe(struct btrfs_raid_bio 
*rbio)
}
 
goto done_nolock;
-   } else  if (waitqueue_active(>wait)) {
+   /*
+* The barrier for this waitqueue_active is not needed,
+* we're protected by h->lock and can't miss a wakeup.
+*/
+   } else if (waitqueue_active(>wait)) {
spin_unlock(>bio_list_lock);
spin_unlock_irqrestore(>lock, flags);
wake_up(>wait);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1bbaace73383..d0deb4643502 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2950,6 +2950,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
atomic_set(_root_tree->log_commit[index2], 0);
mutex_unlock(_root_tree->log_mutex);
 
+   /*
+* The barrier before waitqueue_active is implied by mutex_unlock
+*/
if (waitqueue_active(_root_tree->log_commit_wait[index2]))
wake_up(_root_tree->log_commit_wait[index2]);
 out:
@@ -2961,6 +2964,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
atomic_set(>log_commit[index1], 0);
mutex_unlock(>log_mutex);
 
+   /*
+* The barrier before waitqueue_active is implied by mutex_unlock
+*/
if (waitqueue_active(>log_commit_wait[index1]))
wake_up(>log_commit_wait[index1]);
return ret;
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/6] btrfs: comment the rest of implicit barriers before waitqueue_active

2015-10-11 Thread David Sterba
There are atomic operations that imply the barrier for waitqueue_active
mixed in an if-condition.

Signed-off-by: David Sterba 
---
 fs/btrfs/delayed-inode.c | 4 
 fs/btrfs/disk-io.c   | 3 +++
 fs/btrfs/inode.c | 3 +++
 fs/btrfs/locking.c   | 9 +
 fs/btrfs/volumes.c   | 3 +++
 5 files changed, 22 insertions(+)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index a2ae42720a6a..e0941fbb913c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -463,6 +463,10 @@ static int __btrfs_add_delayed_deletion_item(struct 
btrfs_delayed_node *node,
 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
 {
int seq = atomic_inc_return(_root->items_seq);
+
+   /*
+* atomic_dec_return implies a barrier for waitqueue_active
+*/
if ((atomic_dec_return(_root->items) <
BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
waitqueue_active(_root->wait))
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 295795aebe0b..379526ffd84d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -802,6 +802,9 @@ static void run_one_async_done(struct btrfs_work *work)
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
 
+   /*
+* atomic_dec_return implies a barrier for waitqueue_active
+*/
if (atomic_dec_return(_info->nr_async_submits) < limit &&
waitqueue_active(_info->async_submit_wait))
wake_up(_info->async_submit_wait);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 611b66d73e80..7be4abe25e06 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1096,6 +1096,9 @@ static noinline void async_cow_submit(struct btrfs_work 
*work)
nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
PAGE_CACHE_SHIFT;
 
+   /*
+* atomic_sub_return implies a barrier for waitqueue_active
+*/
if (atomic_sub_return(nr_pages, >fs_info->async_delalloc_pages) <
5 * 1024 * 1024 &&
waitqueue_active(>fs_info->async_submit_wait))
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 03f8630dbaf2..8077461fc56a 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -79,6 +79,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, 
int rw)
write_lock(>lock);
WARN_ON(atomic_read(>spinning_writers));
atomic_inc(>spinning_writers);
+   /*
+* atomic_dec_and_test implies a barrier for waitqueue_active
+*/
if (atomic_dec_and_test(>blocking_writers) &&
waitqueue_active(>write_lock_wq))
wake_up(>write_lock_wq);
@@ -86,6 +89,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, 
int rw)
BUG_ON(atomic_read(>blocking_readers) == 0);
read_lock(>lock);
atomic_inc(>spinning_readers);
+   /*
+* atomic_dec_and_test implies a barrier for waitqueue_active
+*/
if (atomic_dec_and_test(>blocking_readers) &&
waitqueue_active(>read_lock_wq))
wake_up(>read_lock_wq);
@@ -229,6 +235,9 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer 
*eb)
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(>blocking_readers) == 0);
+   /*
+* atomic_dec_and_test implies a barrier for waitqueue_active
+*/
if (atomic_dec_and_test(>blocking_readers) &&
waitqueue_active(>read_lock_wq))
wake_up(>read_lock_wq);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6fc735869c18..ff3527192409 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -345,6 +345,9 @@ static noinline void run_scheduled_bios(struct btrfs_device 
*device)
pending = pending->bi_next;
cur->bi_next = NULL;
 
+   /*
+* atomic_dec_return implies a barrier for waitqueue_active
+*/
if (atomic_dec_return(_info->nr_async_bios) < limit &&
waitqueue_active(_info->async_submit_wait))
wake_up(_info->async_submit_wait);
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/6] btrfs: add barrier for waitqueue_active in clear_btree_io_tree

2015-10-11 Thread David Sterba
waitqueue_active should be preceded by a barrier, in this function we
don't need to call it all the time.

Signed-off-by: David Sterba 
---
 fs/btrfs/transaction.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 74bc3338418b..e8e5b5a10719 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -82,6 +82,12 @@ void btrfs_put_transaction(struct btrfs_transaction 
*transaction)
 static void clear_btree_io_tree(struct extent_io_tree *tree)
 {
spin_lock(>lock);
+   /*
+* Do a single barrier for the waitqueue_active check here, the state
+* of the waitqueue should not change once clear_btree_io_tree is
+* called.
+*/
+   smp_mb();
while (!RB_EMPTY_ROOT(>state)) {
struct rb_node *node;
struct extent_state *state;
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PULL][PATCH 0/6] Barriers around waitqueue_active (V2)

2015-10-11 Thread David Sterba
Hi,

I've updated the main patch according to your comments in
https://www.mail-archive.com/linux-btrfs%40vger.kernel.org/msg42551.html

though with one exception, the barrier in btrfs_bio_counter_sub which seems
to be in performance sensitive context but I did not find a good way how
determine that dev-replace is running. It's protected by the shared
satus of 'mutually_exclusive_operation_running' and we'd have to do some
other checks that would also imply some performance drop. I hope it's ok
to skipt that one for now, the remaining new barriers look safe and the rest
is documenting the existing ones.

Please consider pulling this for the 4.4 cycle. Thanks.


The following changes since commit 9ffecb10283508260936b96022d4ee43a7798b4c:

  Linux 4.3-rc3 (2015-09-27 07:50:08 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git 
fix/waitqueue-barriers

for you to fetch changes up to ee86395458072760d62e66aad10a5e9e8902b8cf:

  btrfs: comment the rest of implicit barriers before waitqueue_active 
(2015-10-10 18:42:00 +0200)


David Sterba (6):
  btrfs: remove waitqueue_active check from btrfs_rm_dev_replace_unblocked
  btrfs: add barrier for waitqueue_active in clear_btree_io_tree
  btrfs: comment waitqueue_active implied by locks
  btrfs: add comments to barriers before waitqueue_active
  btrfs: remove extra barrier before waitqueue_active
  btrfs: comment the rest of implicit barriers before waitqueue_active

 fs/btrfs/compression.c   |  3 +++
 fs/btrfs/delayed-inode.c |  4 
 fs/btrfs/dev-replace.c   |  3 +--
 fs/btrfs/disk-io.c   |  3 +++
 fs/btrfs/extent-tree.c   |  3 +--
 fs/btrfs/inode.c |  3 +++
 fs/btrfs/locking.c   | 12 
 fs/btrfs/ordered-data.c  |  6 ++
 fs/btrfs/raid56.c|  6 +-
 fs/btrfs/transaction.c   |  9 +
 fs/btrfs/tree-log.c  | 14 --
 fs/btrfs/volumes.c   |  3 +++
 12 files changed, 62 insertions(+), 7 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/6] btrfs: remove extra barrier before waitqueue_active

2015-10-11 Thread David Sterba
Removing barriers is scary, but a call to atomic_dec_and_test implies
a barrier, so we don't need to issue another one.

Signed-off-by: David Sterba 
---
 fs/btrfs/tree-log.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d0deb4643502..63275594debd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -229,7 +229,9 @@ int btrfs_pin_log_trans(struct btrfs_root *root)
 void btrfs_end_log_trans(struct btrfs_root *root)
 {
if (atomic_dec_and_test(>log_writers)) {
-   smp_mb();
+   /*
+* Implicit memory barrier after atomic_dec_and_test
+*/
if (waitqueue_active(>log_writer_wait))
wake_up(>log_writer_wait);
}
@@ -2820,7 +2822,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
mutex_lock(_root_tree->log_mutex);
if (atomic_dec_and_test(_root_tree->log_writers)) {
-   smp_mb();
+   /*
+* Implicit memory barrier after atomic_dec_and_test
+*/
if (waitqueue_active(_root_tree->log_writer_wait))
wake_up(_root_tree->log_writer_wait);
}
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


BTRFS with 8TB SMR drives

2015-10-11 Thread Warren Hughes
Hi guys, just added a new Seagate Archive 8TB drive to my BTRFS volume
and I'm getting a tonne of errors when balancing or scrubbing.

A short smartctl test reports fine, running a long one now. Will also
run seatools from a bootable DOS USB while at work today.

Running latest firmware on my 9240-8i which explicitly supports this drive.

I'm finding it very hard to tell if SMR drives are OK with BTRFS
currently - anyone chime in?

Thanks, Warren

[wsh@cloud storcli]$ uname -a
Linux cloud.warrenhughes.net 4.1.10-2-lts #1 SMP Wed Oct 7 21:57:44
CEST 2015 x86_64 GNU/Linux

[wsh@cloud storcli]$ sudo btrfs version
btrfs-progs v4.2.1


[wsh@cloud ~]$ sudo btrfs scrub status /mnt/media
scrub status for 643c3145-8371-4011-8c34-20240e1bbaff
scrub started at Sun Oct 11 20:37:38 2015 and was aborted after 10:35:47
total bytes scrubbed: 8.15TiB with 104218141 errors
error details: read=98736175 csum=5481966
corrected errors: 5484382, uncorrectable errors: 98733759,
unverified errors: 0

[/dev/sdo].write_io_errs   100154203
[/dev/sdo].read_io_errs98735251
[/dev/sdo].flush_io_errs   634
[/dev/sdo].corruption_errs 5481966
[/dev/sdo].generation_errs 0

[wsh@cloud ~]$ sudo smartctl -H -T permissive /dev/sdo
smartctl 6.4 2015-06-04 r4109 [x86_64-linux-4.1.10-2-lts] (local build)
Copyright (C) 2002-15, Bruce Allen, Christian Franke, www.smartmontools.org

Short INQUIRY response, skip product id
=== START OF READ SMART DATA SECTION ===
SMART Health Status: OK

[wsh@cloud storcli]$ sudo ./storcli64 /c0 show
Product Name = LSI MegaRAID SAS 9240-8i
Serial Number = P51010
SAS Address =  500605b004e9d030
PCI Address = 00:03:00:00
System Time = 10/12/2015 07:38:22
Mfg. Date = 03/17/10
Controller Time = 10/12/2015 07:38:20
FW Package Build = 20.13.1-0240
BIOS Version = 4.38.02.2_4.16.08.00_0x06060A05
FW Version = 2.130.404-4659
Driver Name = megaraid_sas
Driver Version = 06.806.08.00-rc1
Vendor Id = 0x1000
Device Id = 0x73
SubVendor Id = 0x1000
SubDevice Id = 0x9240
Host Interface = PCI-E
Device Interface = SAS-6G
Bus Number = 3
Device Number = 0
Function Number = 0
Physical Drives = 7

PD LIST :
===

---
EID:Slt DID State DG Size Intf Med SED PI SeSz ModelSp
---
64:1  4 JBOD  -  1.363 TB SATA HDD N   N  512B WDC WD15EADS-00P8B0  U
64:2  0 JBOD  -  2.728 TB SATA HDD N   N  512B WDC WD30EFRX-68AX9N0 U
64:3  7 JBOD  -  2.728 TB SATA HDD N   N  512B ST3000DM001-1CH166   U
64:4  6 JBOD  -  2.728 TB SATA HDD N   N  512B WDC WD30EFRX-68AX9N0 U
64:5  5 JBOD  -  2.728 TB SATA HDD N   N  512B WDC WD30EFRX-68EUZN0 U
64:6  3 JBOD  -  2.728 TB SATA HDD N   N  512B WDC WD30EFRX-68AX9N0 U
64:7  2 JBOD  -  2.728 TB SATA HDD N   N  512B WDC WD30EFRX-68AX9N0 U
---
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!

2015-10-11 Thread Peter Becker
the output of btrfs check --readonly /dev/sdb

http://pastebin.com/UxkeVd7Y

many entrys with "extent buffer leak"


the output of  btrfs-show-super -i0 /dev/sd[bcd] &&  btrfs-show-super
-i1 /dev/sd[bcd] &&  btrfs-show-super -i2 /dev/sd[bcd]

http://pastebin.com/zs7B8827
http://pastebin.com/Kn1kwgYv
http://pastebin.com/CHC52ef7
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!

2015-10-11 Thread Peter Becker
Ok, that's what i expected. :) if it will work :)
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!

2015-10-11 Thread Stéphane Lesimple

Hello Peter,

I have the same problem you have, as reported ~1 month ago on this 
mailing-list.


My setup is 2 disks, and I tried balancing after adding a third one, in a 
raid5 configuration.
I also have some "extent buffer leak" in my btrfsck, but it's hard to say 
if it can be the cause. If you look at the source code you'll see that 
those messages are not printed by the main checking routine but by an 
helper subroutine, I'm not even sure it represents a problem on the 
filesystem (maybe somebody can sched some light here).


I tried with the 4.3-rc2 kernel, and the kernel bug is still there, 
unfortunately. I also posted an ftrace of the bug, hopefully somebody with 
enough btrfs knowledge will have a look.
I reproduced this bug dozens of time, and as far as I can tell I never lost 
any single byte because of the crash, probably thanks to the transaction 
system of btrfs, so, at least there's that.


--
Stéphane.


Le 11 octobre 2015 22:50:07 Peter Becker  a écrit :


the output of btrfs check --readonly /dev/sdb

http://pastebin.com/UxkeVd7Y

many entrys with "extent buffer leak"


the output of  btrfs-show-super -i0 /dev/sd[bcd] &&  btrfs-show-super
-i1 /dev/sd[bcd] &&  btrfs-show-super -i2 /dev/sd[bcd]

http://pastebin.com/zs7B8827
http://pastebin.com/Kn1kwgYv
http://pastebin.com/CHC52ef7
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS with 8TB SMR drives

2015-10-11 Thread Kristan
Warren Hughes  warrenhughes.net> writes:

> 
> Hi guys, just added a new Seagate Archive 8TB drive to my BTRFS volume
> and I'm getting a tonne of errors when balancing or scrubbing.
> 
> A short smartctl test reports fine, running a long one now. Will also
> run seatools from a bootable DOS USB while at work today.
> 
> Running latest firmware on my 9240-8i which explicitly supports this 
drive.
> 
> I'm finding it very hard to tell if SMR drives are OK with BTRFS
> currently - anyone chime in?
> 
> Thanks, Warren
> 
> [wsh  cloud storcli]$ uname -a
> Linux cloud.warrenhughes.net 4.1.10-2-lts #1 SMP Wed Oct 7 21:57:44
> CEST 2015 x86_64 GNU/Linux
> 
> [wsh  cloud storcli]$ sudo btrfs version
> btrfs-progs v4.2.1
> 
> [wsh  cloud ~]$ sudo btrfs scrub status /mnt/media
> scrub status for 643c3145-8371-4011-8c34-20240e1bbaff
> scrub started at Sun Oct 11 20:37:38 2015 and was aborted 
after 10:35:47
> total bytes scrubbed: 8.15TiB with 104218141 errors
> error details: read=98736175 csum=5481966
> corrected errors: 5484382, uncorrectable errors: 98733759,
> unverified errors: 0
> 
> [/dev/sdo].write_io_errs   100154203
> [/dev/sdo].read_io_errs98735251
> [/dev/sdo].flush_io_errs   634
> [/dev/sdo].corruption_errs 5481966
> [/dev/sdo].generation_errs 0
> 

hi Warren,

I recently (last week) built a 3 disk RAID 5 array using the same 8TB 
drives which worked fine holding ~12TB then added a 4th disk using a 
JMicron PCI SATA controller. I then ran a balance which failed after 
just over 1TB written to the 4th disk. This caused the entire array to 
fail but the main difference to your scenario was that the 4th disk also 
wasn't reporting to SMART properly. 
I then moved all 4 disks onto the motherboard based SATA controller, 
built the array fresh and have copied ~18TB onto it and it seems to be 
working fine. Perhaps I should try a scrub and see :)

I'm using Centos 7.1 but kernel 4.2.1-ml and btrfs-progs 4.2.2
Kristan

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS with 8TB SMR drives

2015-10-11 Thread Warren Hughes
Thanks Kristan, a scrub would be great; mine appeared to be working
fine until the scrub (although I hadn't yet run a balance on it so who
knows).

I might move my 8TB onto the motherboard controller and see if the
situation improves. Will update here tonight.

Cheers, W.

On 12 October 2015 at 11:53, Kristan  wrote:
> Warren Hughes  warrenhughes.net> writes:
>
>>
>> Hi guys, just added a new Seagate Archive 8TB drive to my BTRFS volume
>> and I'm getting a tonne of errors when balancing or scrubbing.
>>
>> A short smartctl test reports fine, running a long one now. Will also
>> run seatools from a bootable DOS USB while at work today.
>>
>> Running latest firmware on my 9240-8i which explicitly supports this
> drive.
>>
>> I'm finding it very hard to tell if SMR drives are OK with BTRFS
>> currently - anyone chime in?
>>
>> Thanks, Warren
>>
>> [wsh  cloud storcli]$ uname -a
>> Linux cloud.warrenhughes.net 4.1.10-2-lts #1 SMP Wed Oct 7 21:57:44
>> CEST 2015 x86_64 GNU/Linux
>>
>> [wsh  cloud storcli]$ sudo btrfs version
>> btrfs-progs v4.2.1
>>
>> [wsh  cloud ~]$ sudo btrfs scrub status /mnt/media
>> scrub status for 643c3145-8371-4011-8c34-20240e1bbaff
>> scrub started at Sun Oct 11 20:37:38 2015 and was aborted
> after 10:35:47
>> total bytes scrubbed: 8.15TiB with 104218141 errors
>> error details: read=98736175 csum=5481966
>> corrected errors: 5484382, uncorrectable errors: 98733759,
>> unverified errors: 0
>>
>> [/dev/sdo].write_io_errs   100154203
>> [/dev/sdo].read_io_errs98735251
>> [/dev/sdo].flush_io_errs   634
>> [/dev/sdo].corruption_errs 5481966
>> [/dev/sdo].generation_errs 0
>>
>
> hi Warren,
>
> I recently (last week) built a 3 disk RAID 5 array using the same 8TB
> drives which worked fine holding ~12TB then added a 4th disk using a
> JMicron PCI SATA controller. I then ran a balance which failed after
> just over 1TB written to the 4th disk. This caused the entire array to
> fail but the main difference to your scenario was that the 4th disk also
> wasn't reporting to SMART properly.
> I then moved all 4 disks onto the motherboard based SATA controller,
> built the array fresh and have copied ~18TB onto it and it seems to be
> working fine. Perhaps I should try a scrub and see :)
>
> I'm using Centos 7.1 but kernel 4.2.1-ml and btrfs-progs 4.2.2
> Kristan
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Warren Hughes
+64 21 633324
IM: gtalk + msn: this email address, skype: akawsh
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: remove empty header file extent-tree.h

2015-10-11 Thread Qu Wenruo

Ping?

Any comment?

Thanks,
Qu

Qu Wenruo wrote on 2015/09/29 09:51 +0800:

Hi Chris,

Would you please merge this patch?

The empty header is introduced by my qgroup accounting rework, and the
cleanup patch is missed in 4.2.

Thanks,
Qu

Qu Wenruo wrote on 2015/07/03 09:17 +0800:

The empty file is introduced as an careless 'git add', remove it.

Reported-by: David Sterba 
Signed-off-by: Qu Wenruo 
---
  fs/btrfs/extent-tree.h | 0
  1 file changed, 0 insertions(+), 0 deletions(-)
  delete mode 100644 fs/btrfs/extent-tree.h

diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
deleted file mode 100644
index e69de29..000


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS with 8TB SMR drives

2015-10-11 Thread Warren Hughes
Hopefully this is of use - its a beast; 34MB when uncompressed

https://drive.google.com/file/d/0B74Kimpwe3nYYUZ2YTMtQXB4V1U/view?usp=sharing

On 12 October 2015 at 14:43, Chris Murphy  wrote:
> Is it possible to get a complete dmesg included in the thread, or if
> it's too big attach it to a bug report? I'm curious if there are any
> libata messages, as well as the specific Btrfs messages.
>
>
> ---
> Chris Murphy



-- 
Warren Hughes
+64 21 633324
IM: gtalk + msn: this email address, skype: akawsh
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS with 8TB SMR drives

2015-10-11 Thread Warren Hughes
more info for anyone interested:

[wsh@cloud ~]$ sudo btrfs fi df /mnt/media
Data, RAID1: total=13.64TiB, used=13.61TiB
System, RAID1: total=32.00MiB, used=2.22MiB
Metadata, RAID1: total=16.00GiB, used=15.10GiB
GlobalReserve, single: total=512.00MiB, used=0.00B


[wsh@cloud ~]$ sudo btrfs fi sh /mnt/media
Label: none  uuid: 643c3145-8371-4011-8c34-20240e1bbaff
Total devices 11 FS bytes used 13.63TiB
devid8 size 2.73TiB used 2.54TiB path /dev/sdh
devid9 size 2.73TiB used 2.54TiB path /dev/sdc
devid   10 size 2.73TiB used 2.54TiB path /dev/sdf
devid   11 size 1.82TiB used 1.63TiB path /dev/sdn
devid   12 size 2.73TiB used 2.54TiB path /dev/sdg
devid   14 size 2.73TiB used 2.54TiB path /dev/sda
devid   15 size 2.73TiB used 2.54TiB path /dev/sdd
devid   16 size 2.73TiB used 2.54TiB path /dev/sdk
devid   17 size 2.73TiB used 2.54TiB path /dev/sdl
devid   18 size 3.64TiB used 3.45TiB path /dev/sdm
devid   19 size 7.28TiB used 1.93TiB path /dev/sdo

btrfs-progs v4.2.1

On 12 October 2015 at 14:43, Chris Murphy  wrote:
> Is it possible to get a complete dmesg included in the thread, or if
> it's too big attach it to a bug report? I'm curious if there are any
> libata messages, as well as the specific Btrfs messages.
>
>
> ---
> Chris Murphy



-- 
Warren Hughes
+64 21 633324
IM: gtalk + msn: this email address, skype: akawsh
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BTRFS with 8TB SMR drives

2015-10-11 Thread Chris Murphy
Is it possible to get a complete dmesg included in the thread, or if
it's too big attach it to a bug report? I'm curious if there are any
libata messages, as well as the specific Btrfs messages.


---
Chris Murphy
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Questions about FIEMAP

2015-10-11 Thread Wang, Zhiye
Hello everyone,

After googled a bit, I got information that btrfs supports FIEMAP (as "cp" 
needs it), but it's not valid for "write" operation.

I guess we cannot write to block device directly after get block list using 
FIEMAP. This is because:

1. COW feature of btrfs (but this can be disabled using NOCOW)
2. File system rebalance
3. Defragmentation

Aren't item #2 and #3 also a problem for "read" operation? For example, after 
"cp" get block list using FIEMAP, file system rebalance occurs, So, previous 
result of FIEMAP is not valid anymore.

Or maybe I misunderstood something. Please correct me.



Thanks
Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: filesystem goes ro trying to balance. "cpu stuck"

2015-10-11 Thread Duncan
Donald Pearson posted on Sun, 11 Oct 2015 11:46:14 -0500 as excerpted:

> Kernel 4.2.2-1.el7.elrepo btrfs-progs v4.2.1
> 
> I'm attempting to convert a filesystem from raid6 to raid10.  I didn't
> have any functional problems with it, but performance is abysmal
> compared to basically the same arrangement in raid10 so I thought I'd
> just get away from raid56 for a while (I also saw something about parity
> raid code developed beyond 2-disk parity that was ignored/thrown away so
> I'm thinking the devs don't care much about about parity raid at least
> for now).

Note on the parity-raid story:  AFAIK at least the btrfs folks aren't 
ignoring it (I don't know about the mdraid/dmraid folks).  There's simply 
more opportunities for new features than there are coders to code them 
up, and while progress is indeed occurring, some of these features may 
well take years.

Consider, even standard raid56 support was originally planned for IIRC 
3.5, but it wasn't actually added until (IIRC) 3.9, and that was only 
partial/runtime support (the parities were being calculated and written, 
but the tools to rebuild from parity were incomplete/broken/non-existent, 
so it was effectively a slow raid0 in terms of reliability, that would be 
upgraded to raid56 "for free" once the tools were done).  Complete raid56 
support wasn't even nominally there until 3.19, with the initial bugs 
still being worked out thru 4.0 and into 4.1.  So it took about /three/ 
/years/ longer than initially planned.

This sort of longer-to-implement-than-planned pattern has repeated 
multiple times over the life of btrfs, which is why it's taking so long 
to mature and stabilize.

So it's not that multi-parity-raid is being rejected or ignored, it's 
simply that there's way more to do than people to do it, and btrfs as a 
cow-based filesystem isn't exactly the simplest thing to implement 
correctly, so initial plans turned out to be /wildly/ optimistic, and 
honestly, some of these features, while not rejected, could well be a 
decade out.  Obviously others will be implemented before then, but 
there's just so many, and so few devs working on what really is a complex 
project, so something ends up being shoved back to that decade out, and 
that's the way it's going to be unless btrfs suddenly gets way more 
developer resources working on it than it has now.

> Partway through the balance something goes wrong and filesystem is
> forced read-only stopping the balance.
> 
> I did a fschk and it didn't complain about/find any errors.  The drives
> aren't throwing any errors or incrementing any smart attributes.  This
> is a backup array, so it's not the end of the world if I have to just
> blow it away and rebuild as raid10 from scratch.
> 
> The console prints this error.
> NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s!
> [btrfs-balance:8015]

I'm a user not a dev, tho I am a regular on this list, and backtraces 
don't mean a lot to me, so take this FWIW...

1) How old is the filesystem?  It isn't quite new, created with 
mkfs.btrfs from btrfs-progs v4.2.0 or v4.2.1, is it?  There's a known 
mkfs.btrfs bug along in there, that I don't remember whether it's fixed 
by 4.2.1 or only the latest 4.2.2, but it creates invalid filesystems.  
Btrfs check from 4.2.2 can detect the problem, but can't fix it, and as 
the filesystems as they are are unstable, it's best to get what you need 
off of them and recreate them with a non-buggy mkfs.btrfs ASAP.

2) Since you're on progs v4.2.1 ATM, that may apply to its mkfs.btrfs as 
well.  Please upgrade to 4.2.2 before creating any further btrfs, or 
failing that, downgrade to 4.1.3 or whatever the last in the progs 4.1 
series was.

3) Are you running btrfs quotas on the filesystem?  Unfortunately, btrfs 
quota handling code remains an unstable sore spot, tho they're continuing 
to work hard on fixing it.  I'm continuing to recommend, as I have for 
some time now, that people don't use it unless they're willing to deal 
with the problems and are actively working with the devs to fix them.  
Otherwise, either they need quota support and should really choose a 
filesystem where the feature is mature and stable, or they don't, in 
which case just leaving it off (or turning it off if on) avoids the 
problem.

There's at least two confirmed reasonably recent cases where turning off 
btrfs quota support eliminated the issues people were reporting, so this 
isn't an idle recommendation, it really does help in at least some 
cases.  If you don't really need quotas, leave (or turn) them off.  If 
you do, you really should be using a filesystem where the quota feature 
is mature and stable enough to rely on.  Yes, it does make a difference.

4) Snapshots (scaling).  While snapshots are a reasonably mature feature, 
they do remain a scaling challenge.  My recommendation is that you try to 
keep to about 250-ish snapshots per subvolume, no more than 3000 
snapshots worst-case total, and better no more than 1000 or 

[PATCH] btrfs-progs: Add all missing close_ctree and btrfs_close_all_devices

2015-10-11 Thread Zhao Lei
This patch add all missing close_ctree and btrfs_close_all_devices
to several tools in btrfs progs, to avoid memory leak.

Signed-off-by: Zhao Lei 
---
 btrfs-calc-size.c|  1 +
 btrfs-debug-tree.c   |  5 -
 btrfs-find-root.c|  1 +
 btrfs-map-logical.c  |  1 +
 btrfs-select-super.c |  3 +++
 btrfstune.c  |  1 +
 cmds-filesystem.c| 14 --
 cmds-property.c  |  2 ++
 cmds-rescue.c|  5 -
 cmds-restore.c   |  1 +
 cmds-scrub.c |  4 +++-
 11 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/btrfs-calc-size.c b/btrfs-calc-size.c
index 7287858..b756693 100644
--- a/btrfs-calc-size.c
+++ b/btrfs-calc-size.c
@@ -508,5 +508,6 @@ int main(int argc, char **argv)
 out:
close_ctree(root);
free(roots);
+   btrfs_close_all_devices();
return ret;
 }
diff --git a/btrfs-debug-tree.c b/btrfs-debug-tree.c
index 7d8e876..8adc39f 100644
--- a/btrfs-debug-tree.c
+++ b/btrfs-debug-tree.c
@@ -28,6 +28,7 @@
 #include "disk-io.h"
 #include "print-tree.h"
 #include "transaction.h"
+#include "volumes.h"
 #include "utils.h"
 
 static int print_usage(int ret)
@@ -428,5 +429,7 @@ no_node:
printf("uuid %s\n", uuidbuf);
printf("%s\n", PACKAGE_STRING);
 close_root:
-   return close_ctree(root);
+   ret = close_ctree(root);
+   btrfs_close_all_devices();
+   return ret;
 }
diff --git a/btrfs-find-root.c b/btrfs-find-root.c
index 01b3603..fc3812c 100644
--- a/btrfs-find-root.c
+++ b/btrfs-find-root.c
@@ -216,5 +216,6 @@ int main(int argc, char **argv)
 out:
btrfs_find_root_free();
close_ctree(root);
+   btrfs_close_all_devices();
return ret;
 }
diff --git a/btrfs-map-logical.c b/btrfs-map-logical.c
index d9fa6b2..deccb03 100644
--- a/btrfs-map-logical.c
+++ b/btrfs-map-logical.c
@@ -357,6 +357,7 @@ out_close_fd:
 close:
free(output_file);
close_ctree(root);
+   btrfs_close_all_devices();
if (ret < 0)
ret = 1;
return ret;
diff --git a/btrfs-select-super.c b/btrfs-select-super.c
index b790f3e..df74153 100644
--- a/btrfs-select-super.c
+++ b/btrfs-select-super.c
@@ -23,6 +23,7 @@
 #include 
 #include "kerncompat.h"
 #include "ctree.h"
+#include "volumes.h"
 #include "disk-io.h"
 #include "print-tree.h"
 #include "transaction.h"
@@ -101,5 +102,7 @@ int main(int ac, char **av)
 */
printf("using SB copy %llu, bytenr %llu\n", (unsigned long long)num,
   (unsigned long long)bytenr);
+   close_ctree(root);
+   btrfs_close_all_devices();
return ret;
 }
diff --git a/btrfstune.c b/btrfstune.c
index c248ee6..0907aa9 100644
--- a/btrfstune.c
+++ b/btrfstune.c
@@ -548,6 +548,7 @@ int main(int argc, char *argv[])
}
 out:
close_ctree(root);
+   btrfs_close_all_devices();
 
return ret;
 }
diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 3663734..a14cb85 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -918,6 +918,7 @@ devs_only:
}
 out:
printf("%s\n", PACKAGE_STRING);
+   btrfs_close_all_devices();
free_seen_fsid();
return ret;
 }
@@ -1279,21 +1280,22 @@ static const char * const cmd_filesystem_label_usage[] 
= {
 
 static int cmd_filesystem_label(int argc, char **argv)
 {
+   int ret;
+
if (check_argc_min(argc, 2) || check_argc_max(argc, 3))
usage(cmd_filesystem_label_usage);
 
if (argc > 2) {
-   return set_label(argv[1], argv[2]);
+   ret = set_label(argv[1], argv[2]);
} else {
char label[BTRFS_LABEL_SIZE];
-   int ret;
-
ret = get_label(argv[1], label);
if (!ret)
-   fprintf(stdout, "%s\n", label);
-
-   return ret;
+   printf("%s\n", label);
}
+
+   btrfs_close_all_devices();
+   return ret;
 }
 
 static const char filesystem_cmd_group_info[] =
diff --git a/cmds-property.c b/cmds-property.c
index 0ffd250..f8c2823 100644
--- a/cmds-property.c
+++ b/cmds-property.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 
+#include "volumes.h"
 #include "commands.h"
 #include "props.h"
 #include "ctree.h"
@@ -319,6 +320,7 @@ static int setget_prop(int types, const char *object,
else
ret = 0;
 
+   btrfs_close_all_devices();
 out:
return ret;
 
diff --git a/cmds-rescue.c b/cmds-rescue.c
index fb3227b..4885b8a 100644
--- a/cmds-rescue.c
+++ b/cmds-rescue.c
@@ -19,6 +19,7 @@
 #include "kerncompat.h"
 
 #include 
+#include "volumes.h"
 #include "ctree.h"
 #include "transaction.h"
 #include "disk-io.h"
@@ -101,6 +102,7 @@ static int cmd_rescue_chunk_recover(int argc, char *argv[])
} else {
fprintf(stdout, "Fail to recover the chunk tree.\n");
}
+   btrfs_close_all_devices();
return ret;
 }
 
@@ -149,6 +151,7 @@ static int cmd_rescue_super_recover(int