Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!
dmesg: [ 3000.613310] INFO: task btrfs-transacti:11423 blocked for more than 120 seconds. [ 3000.613314] Tainted: G U 4.2.3-040203-generic #201510030832 [ 3000.613314] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 3000.613315] btrfs-transacti D 0 11423 2 0x [ 3000.613318] 88007bd2bb58 0046 880215d7e600 8802121d2640 [ 3000.613319] 0246 88007bd2c000 8801aa607928 8801aa607940 [ 3000.613321] 88007bd2bba8 8801aa607920 88007bd2bb78 817a6357 [ 3000.613322] Call Trace: [ 3000.613327] [] schedule+0x37/0x80 [ 3000.613341] [] btrfs_tree_lock+0x98/0x1c0 [btrfs] [ 3000.613344] [] ? prepare_to_wait_event+0xf0/0xf0 [ 3000.613349] [] btrfs_search_slot+0x6e5/0x9c0 [btrfs] [ 3000.613351] [] ? __set_page_dirty_nobuffers+0xe7/0x140 [ 3000.613356] [] btrfs_lookup_inode+0x2f/0xa0 [btrfs] [ 3000.613364] [] ? btrfs_delete_delayed_items+0x313/0x330 [btrfs] [ 3000.613370] [] ? btrfs_insert_delayed_items+0xa4/0x420 [btrfs] [ 3000.613376] [] __btrfs_update_delayed_inode+0x5a/0x1e0 [btrfs] [ 3000.613382] [] __btrfs_run_delayed_items+0x17c/0x210 [btrfs] [ 3000.613388] [] btrfs_run_delayed_items+0x13/0x20 [btrfs] [ 3000.613395] [] btrfs_commit_transaction+0x2c8/0xb10 [btrfs] [ 3000.613400] [] ? start_transaction+0x93/0x580 [btrfs] [ 3000.613406] [] transaction_kthread+0x1ba/0x240 [btrfs] [ 3000.613411] [] ? btrfs_cleanup_transaction+0x540/0x540 [btrfs] [ 3000.613412] [] kthread+0xc9/0xe0 [ 3000.613414] [] ? kthread_create_on_node+0x180/0x180 [ 3000.613415] [] ret_from_fork+0x3f/0x70 [ 3000.613416] [] ? kthread_create_on_node+0x180/0x180 [ 3120.613665] INFO: task btrfs-transacti:11423 blocked for more than 120 seconds. [ 3120.613669] Tainted: G U 4.2.3-040203-generic #201510030832 [ 3120.613669] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 3120.613670] btrfs-transacti D 0 11423 2 0x [ 3120.613672] 88007bd2bb58 0046 880215d7e600 8802121d2640 [ 3120.613674] 0246 88007bd2c000 8801aa607928 8801aa607940 [ 3120.613675] 88007bd2bba8 8801aa607920 88007bd2bb78 817a6357 [ 3120.613676] Call Trace: [ 3120.613682] [] schedule+0x37/0x80 [ 3120.613695] [] btrfs_tree_lock+0x98/0x1c0 [btrfs] [ 3120.613698] [] ? prepare_to_wait_event+0xf0/0xf0 [ 3120.613702] [] btrfs_search_slot+0x6e5/0x9c0 [btrfs] [ 3120.613704] [] ? __set_page_dirty_nobuffers+0xe7/0x140 [ 3120.613709] [] btrfs_lookup_inode+0x2f/0xa0 [btrfs] [ 3120.613716] [] ? btrfs_delete_delayed_items+0x313/0x330 [btrfs] [ 3120.613721] [] ? btrfs_insert_delayed_items+0xa4/0x420 [btrfs] [ 3120.613727] [] __btrfs_update_delayed_inode+0x5a/0x1e0 [btrfs] [ 3120.613732] [] __btrfs_run_delayed_items+0x17c/0x210 [btrfs] [ 3120.613744] [] btrfs_run_delayed_items+0x13/0x20 [btrfs] [ 3120.613749] [] btrfs_commit_transaction+0x2c8/0xb10 [btrfs] [ 3120.613754] [] ? start_transaction+0x93/0x580 [btrfs] [ 3120.613759] [] transaction_kthread+0x1ba/0x240 [btrfs] [ 3120.613764] [] ? btrfs_cleanup_transaction+0x540/0x540 [btrfs] [ 3120.613766] [] kthread+0xc9/0xe0 [ 3120.613767] [] ? kthread_create_on_node+0x180/0x180 [ 3120.613768] [] ret_from_fork+0x3f/0x70 [ 3120.613769] [] ? kthread_create_on_node+0x180/0x180 [ 3240.614145] INFO: task btrfs-transacti:11423 blocked for more than 120 seconds. [ 3240.614148] Tainted: G U 4.2.3-040203-generic #201510030832 [ 3240.614149] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 3240.614150] btrfs-transacti D 0 11423 2 0x [ 3240.614152] 88007bd2bb58 0046 880215d7e600 8802121d2640 [ 3240.614154] 0246 88007bd2c000 8801aa607928 8801aa607940 [ 3240.614155] 88007bd2bba8 8801aa607920 88007bd2bb78 817a6357 [ 3240.614156] Call Trace: [ 3240.614161] [] schedule+0x37/0x80 [ 3240.614174] [] btrfs_tree_lock+0x98/0x1c0 [btrfs] [ 3240.614177] [] ? prepare_to_wait_event+0xf0/0xf0 [ 3240.614181] [] btrfs_search_slot+0x6e5/0x9c0 [btrfs] [ 3240.614183] [] ? __set_page_dirty_nobuffers+0xe7/0x140 [ 3240.614188] [] btrfs_lookup_inode+0x2f/0xa0 [btrfs] [ 3240.614194] [] ? btrfs_delete_delayed_items+0x313/0x330 [btrfs] [ 3240.614200] [] ? btrfs_insert_delayed_items+0xa4/0x420 [btrfs] [ 3240.614206] [] __btrfs_update_delayed_inode+0x5a/0x1e0 [btrfs] [ 3240.614211] [] __btrfs_run_delayed_items+0x17c/0x210 [btrfs] [ 3240.614217] [] btrfs_run_delayed_items+0x13/0x20 [btrfs] [ 3240.614222] [] btrfs_commit_transaction+0x2c8/0xb10 [btrfs] [ 3240.614227] [] ? start_transaction+0x93/0x580 [btrfs] [ 3240.614232] [] transaction_kthread+0x1ba/0x240 [btrfs] [ 3240.614236] [] ? btrfs_cleanup_transaction+0x540/0x540 [btrfs] [ 3240.614238] [] kthread+0xc9/0xe0 [ 3240.614239] [] ?
Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!
I also get some of this syslog entrys: [ 3840.616538] INFO: task btrfs-transacti:11423 blocked for more than 120 seconds. [ 3840.616541] Tainted: G U 4.2.3-040203-generic #201510030832 [ 3840.616542] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 3840.616543] btrfs-transacti D 0 11423 2 0x [ 3840.616545] 88007bd2bb58 0046 880215d7e600 8802121d2640 [ 3840.616547] 0246 88007bd2c000 8801aa607928 8801aa607940 [ 3840.616548] 88007bd2bba8 8801aa607920 88007bd2bb78 817a6357 [ 3840.616549] Call Trace: [ 3840.616555] [] schedule+0x37/0x80 [ 3840.616569] [] btrfs_tree_lock+0x98/0x1c0 [btrfs] [ 3840.616572] [] ? prepare_to_wait_event+0xf0/0xf0 [ 3840.616576] [] btrfs_search_slot+0x6e5/0x9c0 [btrfs] [ 3840.616579] [] ? __set_page_dirty_nobuffers+0xe7/0x140 [ 3840.616584] [] btrfs_lookup_inode+0x2f/0xa0 [btrfs] [ 3840.616592] [] ? btrfs_delete_delayed_items+0x313/0x330 [btrfs] [ 3840.616598] [] ? btrfs_insert_delayed_items+0xa4/0x420 [btrfs] [ 3840.616604] [] __btrfs_update_delayed_inode+0x5a/0x1e0 [btrfs] [ 3840.616609] [] __btrfs_run_delayed_items+0x17c/0x210 [btrfs] [ 3840.616615] [] btrfs_run_delayed_items+0x13/0x20 [btrfs] [ 3840.616621] [] btrfs_commit_transaction+0x2c8/0xb10 [btrfs] [ 3840.616626] [] ? start_transaction+0x93/0x580 [btrfs] [ 3840.616631] [] transaction_kthread+0x1ba/0x240 [btrfs] [ 3840.616636] [] ? btrfs_cleanup_transaction+0x540/0x540 [btrfs] [ 3840.616637] [] kthread+0xc9/0xe0 [ 3840.616638] [] ? kthread_create_on_node+0x180/0x180 [ 3840.616640] [] ret_from_fork+0x3f/0x70 [ 3840.616641] [] ? kthread_create_on_node+0x180/0x180 Is this helpfull? The filesystem is usable but i need the new space. 2015-10-10 21:48 GMT+02:00 Peter Becker: > btrfs balance start -m /media/RAID > > complete with out any error but the resulte of device usage is confusing me. > Metadata on sdb and sdc are 2 GiB, but on sdd (the new added device) > is 4 GiB. And the 2. one that's confusing me, is that sdd has a > "System" entry but sdb and sdc dosn't > > floyd@nas ~ $ sudo btrfs dev us /media/RAID/ > /dev/sdb, ID: 1 >Device size: 2.73TiB >Data,RAID1: 2.11TiB >Metadata,RAID1: 2.00GiB >System,RAID1: 32.00MiB >Unallocated: 628.49GiB > > /dev/sdc, ID: 2 >Device size: 2.73TiB >Data,RAID1: 2.11TiB >Metadata,RAID1: 2.00GiB >Unallocated: 628.52GiB > > /dev/sdd, ID: 3 >Device size: 2.73TiB >Data,RAID1:792.00GiB >Metadata,RAID1: 4.00GiB >System,RAID1: 32.00MiB >Unallocated: 1.95TiB > > 2015-10-10 21:23 GMT+02:00 Peter Becker : >> Hi Henk, >> >> i have try it with kernel 4.1.6 and 4.2.3; btrfs progs 4.2.1 and 4.2.2 >> .. the same error. >> System freeze after 70% of balancing. >> >> Scrub complete without error. >> >> has someone a hint what i can do now? >> >> 2015-10-09 15:52 GMT+02:00 Henk Slager : >>> Hi Peter, >>> >>> I would try to add the mount option skip_balance for your raid1 >>> pool first, then see if you can use your system as you normally would. >>> I assume you can live without explicit (re-)balance for some time, >>> i.e. that the original disks are not too full. >>> >>> I recently did also some disks add/remove and also raid profile >>> convert and found out that kernel 4.2.x did crash my system with >>> various kernel bugs. So I switched back to 4.1.6 and although other >>> bugs hit me (see https://bugzilla.kernel.org/show_bug.cgi?id=104371 ) >>> the actions I wanted did complete. >>> >>> Using "btrfs check --repair" has never resulted in succes for me (for >>> some root filesystems (single profiles for s m d) on real and virual >>> machines), so I would only use that once you have your files backed up >>> on some other (cloned) filesystem. >>> >>> /Henk >>> >>> On Fri, Oct 9, 2015 at 9:41 AM, Peter Becker wrote: At first i add a new device to my btrfs raid1 pool and start balance. After ~5 hours, balanace hangs and cpu-usage goes to 100% (kworker/u4 use all cpu-power). What should i do now? Run "btrfs check --repair" on all devices? Kernel: 4.2.3-040203-generic Btrfs progs v4.2.1 Full Syslog: https://bugzilla.kernel.org/show_bug.cgi?id=105681 From Syslog: [16880.495586] kernel BUG at /home/kernel/COD/linux/fs/btrfs/extent-tree.c:1833! [16880.495603] invalid opcode: [#1] SMP [16880.495614] Modules linked in: xt_nat veth xt_conntrack xt_addrtype br_netfilter nvram dm_thin_pool dm_persistent_data msr dm_bio_prison dm_bufio libcrc32c ir_lirc_codec ir_xmp_decoder lirc_dev ir_mce_kbd_decoder ir_sharp_decoder ir_sony_decoder ir_sanyo_decoder
Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!
Peter Becker posted on Sat, 10 Oct 2015 21:48:31 +0200 as excerpted: > btrfs balance start -m /media/RAID > > complete with out any error but the resulte of device usage is confusing > me. > Metadata on sdb and sdc are 2 GiB, but on sdd (the new added device) > is 4 GiB. And the 2. one that's confusing me, is that sdd has a "System" > entry but sdb and sdc dosn't > > floyd@nas ~ $ sudo btrfs dev us /media/RAID/ > /dev/sdb, ID: 1 >Device size: 2.73TiB >Data,RAID1: 2.11TiB >Metadata,RAID1: 2.00GiB >System,RAID1: 32.00MiB >Unallocated: 628.49GiB > > /dev/sdc, ID: 2 >Device size: 2.73TiB >Data,RAID1: 2.11TiB >Metadata,RAID1: 2.00GiB >Unallocated: 628.52GiB > > /dev/sdd, ID: 3 >Device size: 2.73TiB >Data,RAID1:792.00GiB >Metadata,RAID1: 4.00GiB >System,RAID1: 32.00MiB >Unallocated: 1.95TiB FWIW, there's also btrfs fi usage, which prints a somewhat different layout of pretty much the same statistics. It may be useful to compare output styles and choose the one you prefer. I prefer fi usage to dev usage in most cases, but YMMV. The key thing to remember about btrfs raid1 on more than two devices is that it's exactly two copies, not N copies, where N is the number of devices. In a three-device raid1, by definition, for each chunk that will mean one copy each on two devices, with the third device not getting a copy of that particular chunk, since btrfs raid1 is exactly two copies, no more, no less. So system is raid1, and sdb and sdd each have a copy of the (apparently just one) system chunk, one copy each for two copies total, leaving no system chunk to be placed on sdc, which is why it has none. And, given the stats, there are 4 GiB of raid1 metadata chunks comprising two copies of 2 GiB worth of metadata. Half that metadata has a copy each on sdb and sdd, while the other half has a copy each on sdc and sdd. IOW, sdd has a copy of all metadata, but sdb and sdc only have a copy of half the metadata each. Since the chunk allocator creates new chunks on the device with the most available space, subject to the restriction that for raid1, there's two copies and both copies cannot be on the same device, because sdd was recently added and thus the one most empty, when you ran the metadata balance, it created one copy of the raid1 two copies on the new device as it had the most free space, and then had to select one of the other two devices for the other copy. Since the other two devices were basically evenly filled, it alternated, selecting one and then the other, so each one got the second copy of half of the metadata, while the new device with the most free space got the first copy of all metadata as it was rewritten by the balance. -- Duncan - List replies preferred. No HTML msgs. "Every nonfree program has a lord, a master -- and if you use the program, he is your master." Richard Stallman -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 8/9] vfs: Add vfs_copy_file_range() support for pagecache copies
On Wed, Sep 30, 2015 at 01:26:52PM -0400, Anna Schumaker wrote: > This allows us to have an in-kernel copy mechanism that avoids frequent > switches between kernel and user space. This is especially useful so > NFSD can support server-side copies. > > I make pagecache copies configurable by adding three new (exclusive) > flags: > - COPY_FR_REFLINK tells vfs_copy_file_range() to only create a reflink. > - COPY_FR_COPY does a full data copy, but may be filesystem accelerated. > - COPY_FR_DEDUP creates a reflink, but only if the contents of both > ranges are identical. All but FR_COPY really should be a separate system call. Clones (an dedup as a special case of clones) are really a separate beast from file copies. If I want to clone a file I either want it clone fully or fail, not copy a certain amount. That means that a) we need to return an error not short "write", and b) locking impementations are important - we need to prevent other applications from racing with our clone even if it is large, while to get these semantics for the possible short returning file copy will require a proper userland locking protocol. Last but not least file copies need to be interruptible while clones should be not. All this is already important for local file systems and even more important for NFS exporting. So I'd suggest to drop this patch and just let your syscall handle actualy copies with all their horrors. We can go with Peng's patches to generalize the btrfs ioctls for clones for now which is what everyone already uses anyway, and then add a separate sys_file_clone later. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 9/9] btrfs: btrfs_copy_file_range() only supports reflinks
On Wed, Sep 30, 2015 at 01:26:53PM -0400, Anna Schumaker wrote: > Reject copies that don't have the COPY_FR_REFLINK flag set. I think a reflink actually is a perfectly valid copy, and I don't buy the duplicate arguments in earlier threads. We really need to think more in terms of how this impacts a user and now how it's implemented internally. How does a user notice it's a reflink? They don't as implemented in btrfs and co. Now on filesystem that don't always do copy on write but might support reflinks (ocfs2, XFS in the future) this becomes a bit more interesting - the difference he is that we get an implicit fallocate when doing a real copy. But if that's something we have actual requests for that's how we should specify it rather than in terms of arcane implementation details. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs says no errors, but booting gives lots of errors
Duncan <1i5t5.dun...@cox.net> wrote: > covici posted on Sat, 10 Oct 2015 19:08:16 -0400 as excerpted: > > > cov...@ccs.covici.com wrote: > > > >> Lionel Boutonwrote: > >> > >> > Le 10/10/2015 18:55, cov...@ccs.covici.com a écrit : > >> > > [...] > >> > > But do you folks have any idea about my original question, this > >> > > leads me to think that btrfs is too new or something. > >> > > >> > I've seen a recent report of a problem with btrfs-progs 4.2 confirmed > >> > as a bug in mkfs. As you created the filesystem with it, it could be > >> > the problem. > > > > I do have 4.2.2, I could go to, would that be better? > > btrfs-progs-4.2.2 does indeed have the mkfs.btrfs fixes for the bug in > question. You should be fine remaking the filesystem with it. > > If you created the filesystem with the buggy mkfs.btrfs, AFAIK, current > 4.2.2 btrfs check can detect the error, but can't fix it. Blowing away > the filesystem and recreating is the only known fix at this time, and > filesystems created with the buggy version are not safe and could blow up > at any time, so it's best to be rid of them and onto something more > stable as soon as possible. > > I can't help with the subvolumes bit, however, because while I'm on > gentoo/~amd64 here too, also with systemd... > > I don't use subvolumes, as to me it's simply putting too many eggs in one > filesystem basket. Instead, I prefer multiple separate btrfs > filesystems, each on their own partitions. My / includes most of what > packages install, including /usr and /var but not /var/log. It's 8 GiB > in size, under half used. /home is separate, the repos tree (gentoo and > overlays) along with ccache, binpackages, the kernel tree, etc, are > together on a separate partition, /var/log is separate (and tiny, half a > GiB), etc. I keep / mounted read-only by default, so have the parts of / > var/lib that must be runtime-writable symlinked to subdirs of /home/var, > with /home of course mounted writable, but other than that and some /var/ > log/ subdirs, anything that's installed by a package is on /, a lesson I > learned the hard way when I had to recover from backups where /, /usr > and /var were from backups taken on different dates and thus not > synchronized with what portage /thought/ was installed based on /var/db/ > pkg. > > Not saying that's best for you, but it's a solution that I've found works > very well for me, and the relative small 8 GiB size of / makes it easy to > have backup copies of it that I can boot, should my working / take a > dump. But if it's all on the same filesystem, as it is with subvolumes, > and that filesystem takes a dump... it's all gone at once! That's not > something I want to happen, so I vastly prefer the independent > filesystems, but with everything (but the limited exceptions mentioned > above) the package manager deals with on the same one, so it all stays > synced and is backed up as a single unit, which after all remains > reasonably small, 8 GiB, less than half used. Thanks, in the ext4 world, I have lvm and lots of things using separate lvm's. I don't want to go back to partitions, if btrfs is that fragile, maybe I should waita while yet. Or, I could use lvm and put btrfs on top of that, but it seems strange to me. -- Your life is like a penny. You're going to lose it. The question is: How do you spend it? John Covici cov...@ccs.covici.com -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 6/9] vfs: Copy should use file_out rather than file_in
On Wed, Sep 30, 2015 at 01:26:50PM -0400, Anna Schumaker wrote: > The way to think about this is that the destination filesystem reads the > data from the source file and processes it accordingly. This is > especially important to avoid an infinate loop when doing a "server to > server" copy on NFS. And doesn't really matter without those. Either way this looks good enough and should be folded. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 7/9] vfs: Remove copy_file_range mountpoint checks
On Wed, Sep 30, 2015 at 01:26:51PM -0400, Anna Schumaker wrote: > I still want to do an in-kernel copy even if the files are on different > mountpoints, and NFS has a "server to server" copy that expects two > files on different mountpoints. Let's have individual filesystems > implement this check instead. NAK. I thing this is a bad idea in general and will only be convinced by a properly audited actual implementation. And even then with a flag where the file system specificly needs to opt into this behavior instead of getting it by default. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 5/9] vfs: Copy shouldn't forbid ranges inside the same file
Needs to be folded. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 4/9] vfs: Copy should check len after file open mode
Should be folded into patch 1. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs says no errors, but booting gives lots of errors
covici posted on Sat, 10 Oct 2015 19:08:16 -0400 as excerpted: > cov...@ccs.covici.com wrote: > >> Lionel Boutonwrote: >> >> > Le 10/10/2015 18:55, cov...@ccs.covici.com a écrit : >> > > [...] >> > > But do you folks have any idea about my original question, this >> > > leads me to think that btrfs is too new or something. >> > >> > I've seen a recent report of a problem with btrfs-progs 4.2 confirmed >> > as a bug in mkfs. As you created the filesystem with it, it could be >> > the problem. > > I do have 4.2.2, I could go to, would that be better? btrfs-progs-4.2.2 does indeed have the mkfs.btrfs fixes for the bug in question. You should be fine remaking the filesystem with it. If you created the filesystem with the buggy mkfs.btrfs, AFAIK, current 4.2.2 btrfs check can detect the error, but can't fix it. Blowing away the filesystem and recreating is the only known fix at this time, and filesystems created with the buggy version are not safe and could blow up at any time, so it's best to be rid of them and onto something more stable as soon as possible. I can't help with the subvolumes bit, however, because while I'm on gentoo/~amd64 here too, also with systemd... I don't use subvolumes, as to me it's simply putting too many eggs in one filesystem basket. Instead, I prefer multiple separate btrfs filesystems, each on their own partitions. My / includes most of what packages install, including /usr and /var but not /var/log. It's 8 GiB in size, under half used. /home is separate, the repos tree (gentoo and overlays) along with ccache, binpackages, the kernel tree, etc, are together on a separate partition, /var/log is separate (and tiny, half a GiB), etc. I keep / mounted read-only by default, so have the parts of / var/lib that must be runtime-writable symlinked to subdirs of /home/var, with /home of course mounted writable, but other than that and some /var/ log/ subdirs, anything that's installed by a package is on /, a lesson I learned the hard way when I had to recover from backups where /, /usr and /var were from backups taken on different dates and thus not synchronized with what portage /thought/ was installed based on /var/db/ pkg. Not saying that's best for you, but it's a solution that I've found works very well for me, and the relative small 8 GiB size of / makes it easy to have backup copies of it that I can boot, should my working / take a dump. But if it's all on the same filesystem, as it is with subvolumes, and that filesystem takes a dump... it's all gone at once! That's not something I want to happen, so I vastly prefer the independent filesystems, but with everything (but the limited exceptions mentioned above) the package manager deals with on the same one, so it all stays synced and is backed up as a single unit, which after all remains reasonably small, 8 GiB, less than half used. -- Duncan - List replies preferred. No HTML msgs. "Every nonfree program has a lord, a master -- and if you use the program, he is your master." Richard Stallman -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/7] btrfs: introduce _in_rcu variants of message printing functions
Due to the missing variants there are messages that lack the information printed by btrfs_info etc helpers. Signed-off-by: David Sterba--- fs/btrfs/ctree.h | 29 + 1 file changed, 29 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 938efe33be80..88acdffbe384 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4039,14 +4039,43 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) #define btrfs_info(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_INFO fmt, ##args) +/* + * Wrappers that use printk_in_rcu + */ +#define btrfs_emerg_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args) +#define btrfs_alert_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args) +#define btrfs_crit_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args) +#define btrfs_err_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args) +#define btrfs_warn_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args) +#define btrfs_notice_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args) +#define btrfs_info_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args) + #ifdef DEBUG #define btrfs_debug(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) +#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) #else #define btrfs_debug(fs_info, fmt, args...) \ no_printk(KERN_DEBUG fmt, ##args) +#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ + no_printk(KERN_DEBUG fmt, ##args) #endif +#define btrfs_printk_in_rcu(fs_info, fmt, args...) \ +do { \ + rcu_read_lock();\ + btrfs_printk(fs_info, fmt, ##args); \ + rcu_read_unlock(); \ +} while (0) + #ifdef CONFIG_BTRFS_ASSERT __cold -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/7] btrfs: switch message printers to ratelimited variants
Signed-off-by: David Sterba--- fs/btrfs/disk-io.c | 21 ++--- fs/btrfs/extent-tree.c | 14 -- fs/btrfs/extent_io.c| 4 ++-- fs/btrfs/free-space-cache.c | 10 +- fs/btrfs/inode.c| 7 ++- fs/btrfs/scrub.c| 8 6 files changed, 31 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ba41faf623ce..72553cd9ed14 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -319,9 +319,9 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, memcpy(, result, csum_size); read_extent_buffer(buf, , 0, csum_size); - printk_ratelimited(KERN_WARNING - "BTRFS: %s checksum verify failed on %llu wanted %X found %X " - "level %d\n", + btrfs_warn_rl(fs_info, + "%s checksum verify failed on %llu wanted %X found %X " + "level %d", fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); if (result != (char *)_result) @@ -368,9 +368,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ret = 0; goto out; } - printk_ratelimited(KERN_ERR - "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", - eb->fs_info->sb->s_id, eb->start, + btrfs_err_rl(eb->fs_info, + "parent transid verify failed on %llu wanted %llu found %llu", + eb->start, parent_transid, btrfs_header_generation(eb)); ret = 1; @@ -629,15 +629,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start " - "%llu %llu\n", - eb->fs_info->sb->s_id, found_start, eb->start); + btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", + found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root->fs_info, eb)) { - printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n", - eb->fs_info->sb->s_id, eb->start); + btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", + eb->start); ret = -EIO; goto err; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9f9604201333..913a5836e604 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8271,10 +8271,11 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = account_shared_subtree(trans, root, next, generation, level - 1); if (ret) { - printk_ratelimited(KERN_ERR "BTRFS: %s Error " + btrfs_err_rl(root->fs_info, + "Error " "%d accounting shared subtree. Quota " - "is out of sync, rescan required.\n", - root->fs_info->sb->s_id, ret); + "is out of sync, rescan required.", + ret); } } ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, @@ -8363,10 +8364,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, BUG_ON(ret); /* -ENOMEM */ ret = account_leaf_items(trans, root, eb); if (ret) { - printk_ratelimited(KERN_ERR "BTRFS: %s Error " + btrfs_err_rl(root->fs_info, + "error " "%d accounting leaf items. Quota " - "is out of sync, rescan required.\n", - root->fs_info->sb->s_id, ret); + "is out of sync, rescan required.", + ret); } } /* make block locked assertion in clean_tree_block happy */ diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 58b9ccdfe2e9..bb318b4e5dd0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -96,8 +96,8
[PATCH 7/7] btrfs: switch more printks to our helpers
Convert the simple cases, not all functions provide a way to reach the fs_info. Also skipped debugging messages (print-tree, integrity checker and pr_debug) and messages that are printed from possibly unfinished mount. Signed-off-by: David Sterba--- fs/btrfs/disk-io.c | 12 ++-- fs/btrfs/extent_io.c | 18 ++ fs/btrfs/ioctl.c | 6 +++--- fs/btrfs/root-tree.c | 5 +++-- fs/btrfs/send.c | 2 +- fs/btrfs/volumes.c | 3 ++- 6 files changed, 25 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 72553cd9ed14..5bc94d2d5699 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2347,8 +2347,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { - printk(KERN_WARNING "BTRFS: log replay required " - "on RO media\n"); + btrfs_warn(fs_info, "log replay required on RO media"); return -EIO; } @@ -2363,12 +2362,12 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, log_tree_root->node = read_tree_block(tree_root, bytenr, fs_info->generation + 1); if (IS_ERR(log_tree_root->node)) { - printk(KERN_ERR "BTRFS: failed to read log tree\n"); + btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); kfree(log_tree_root); return ret; } else if (!extent_buffer_uptodate(log_tree_root->node)) { - printk(KERN_ERR "BTRFS: failed to read log tree\n"); + btrfs_err(fs_info, "failed to read log tree"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); return -EIO; @@ -3294,8 +3293,9 @@ static int write_dev_supers(struct btrfs_device *device, bh = __getblk(device->bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); if (!bh) { - printk(KERN_ERR "BTRFS: couldn't get super " - "buffer head for bytenr %Lu\n", bytenr); + btrfs_err(device->dev_root->fs_info, + "couldn't get super buffer head for bytenr %llu", + bytenr); errors++; continue; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bb318b4e5dd0..c7a33913188b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5563,13 +5563,15 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move " - "len %lu dst len %lu\n", src_offset, len, dst->len); + btrfs_err(dst->fs_info, + "memmove bogus src_offset %lu move " + "len %lu dst len %lu", src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move " - "len %lu dst len %lu\n", dst_offset, len, dst->len); + btrfs_err(dst->fs_info, + "memmove bogus dst_offset %lu move " + "len %lu dst len %lu", dst_offset, len, dst->len); BUG_ON(1); } @@ -5609,13 +5611,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_i; if (src_offset + len > dst->len) { - printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move " - "len %lu len %lu\n", src_offset, len, dst->len); + btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move " + "len %lu len %lu", src_offset, len, dst->len); BUG_ON(1); } if (dst_offset + len > dst->len) { - printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move " - "len %lu len %lu\n", dst_offset, len, dst->len); + btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move " + "len %lu len %lu", dst_offset, len, dst->len); BUG_ON(1); } if (dst_offset < src_offset) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8e9105af723e..2e520c635709 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1342,7 +1342,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, break; if (btrfs_defrag_cancelled(root->fs_info)) { - printk(KERN_DEBUG
[PATCH 2/7] btrfs: switch message printers to _in_rcu variants
Signed-off-by: David Sterba--- fs/btrfs/check-integrity.c | 4 ++-- fs/btrfs/dev-replace.c | 16 fs/btrfs/ioctl.c | 2 +- fs/btrfs/scrub.c | 14 +++--- fs/btrfs/volumes.c | 18 +- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 541fbfaed276..3e20630db8aa 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -845,8 +845,8 @@ static int btrfsic_process_superblock_dev_mirror( superblock_tmp->never_written = 0; superblock_tmp->mirror_num = 1 + superblock_mirror_num; if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) - printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" -" @%llu (%s/%llu/%d)\n", + btrfs_info_in_rcu(device->dev_root->fs_info, + "new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)", superblock_bdev, rcu_str_deref(device->name), dev_bytenr, dev_state->name, dev_bytenr, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index e54dd5905cee..ede1c1ad0baf 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -379,8 +379,8 @@ int btrfs_dev_replace_start(struct btrfs_root *root, if (ret) btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret); - printk_in_rcu(KERN_INFO - "BTRFS: dev_replace from %s (devid %llu) to %s started\n", + btrfs_info_in_rcu(root->fs_info, + "dev_replace from %s (devid %llu) to %s started", src_device->missing ? "" : rcu_str_deref(src_device->name), src_device->devid, @@ -523,8 +523,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, src_device, tgt_device); } else { - printk_in_rcu(KERN_ERR - "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", + btrfs_err_in_rcu(root->fs_info, + "btrfs_scrub_dev(%s, %llu, %s) failed %d", src_device->missing ? "" : rcu_str_deref(src_device->name), src_device->devid, @@ -540,8 +540,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, return scrub_ret; } - printk_in_rcu(KERN_INFO - "BTRFS: dev_replace from %s (devid %llu) to %s finished\n", + btrfs_info_in_rcu(root->fs_info, + "dev_replace from %s (devid %llu) to %s finished", src_device->missing ? "" : rcu_str_deref(src_device->name), src_device->devid, @@ -809,8 +809,8 @@ static int btrfs_dev_replace_kthread(void *data) progress = status_args->status.progress_1000; kfree(status_args); progress = div_u64(progress, 10); - printk_in_rcu(KERN_INFO - "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", + btrfs_info_in_rcu(fs_info, + "continuing dev_replace from %s (devid %llu) to %s @%u%%", dev_replace->srcdev->missing ? "" : rcu_str_deref(dev_replace->srcdev->name), dev_replace->srcdev->devid, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0adf5422fce9..8e9105af723e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1579,7 +1579,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, new_size = div_u64(new_size, root->sectorsize); new_size *= root->sectorsize; - printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", + btrfs_info_in_rcu(root->fs_info, "new size for %s is %llu", rcu_str_deref(device->name), new_size); if (new_size > old_size) { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a39f5d1144e8..26cfbb0b867c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -580,9 +580,9 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, * hold all of the paths here */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) - printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev " + btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev " "%s, sector %llu, root %llu, inode %llu, offset %llu, " - "length %llu, links %u (path: %s)\n",
[PATCH 4/7] btrfs: switch message printers to ratelimited _in_rcu variants
Signed-off-by: David Sterba--- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/extent_io.c | 4 ++-- fs/btrfs/scrub.c | 20 ++-- fs/btrfs/volumes.c | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 295795aebe0b..ba41faf623ce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3175,8 +3175,8 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) struct btrfs_device *device = (struct btrfs_device *) bh->b_private; - printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to " - "I/O error on %s\n", + btrfs_warn_rl_in_rcu(device->dev_root->fs_info, + "lost page write due to IO error on %s", rcu_str_deref(device->name)); /* note, we dont' set_buffer_write_io_error because we have * our own ways of dealing with the IO errors diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e2357e31609a..58b9ccdfe2e9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2078,8 +2078,8 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, return -EIO; } - printk_ratelimited_in_rcu(KERN_INFO - "BTRFS: read error corrected: ino %llu off %llu (dev %s sector %llu)\n", + btrfs_info_rl_in_rcu(fs_info, + "read error corrected: ino %llu off %llu (dev %s sector %llu)", btrfs_ino(inode), start, rcu_str_deref(dev->name), sector); bio_put(bio); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 26cfbb0b867c..bce86f8772e1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -850,8 +850,8 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) btrfs_dev_replace_stats_inc( >dev_root->fs_info->dev_replace. num_uncorrectable_read_errors); - printk_ratelimited_in_rcu(KERN_ERR "BTRFS: " - "unable to fixup (nodatasum) error at logical %llu on dev %s\n", + btrfs_err_rl_in_rcu(sctx->dev_root->fs_info, + "unable to fixup (nodatasum) error at logical %llu on dev %s", fixup->logical, rcu_str_deref(fixup->dev->name)); } @@ -1230,8 +1230,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sctx->stat.corrected_errors++; sblock_to_check->data_corrected = 1; spin_unlock(>stat_lock); - printk_ratelimited_in_rcu(KERN_ERR - "BTRFS: fixed up error at logical %llu on dev %s\n", + btrfs_err_rl_in_rcu(fs_info, + "fixed up error at logical %llu on dev %s", logical, rcu_str_deref(dev->name)); } } else { @@ -1239,8 +1239,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) spin_lock(>stat_lock); sctx->stat.uncorrectable_errors++; spin_unlock(>stat_lock); - printk_ratelimited_in_rcu(KERN_ERR - "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n", + btrfs_err_rl_in_rcu(fs_info, + "unable to fixup (regular) error at logical %llu on dev %s", logical, rcu_str_deref(dev->name)); } @@ -2201,15 +2201,15 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) spin_lock(>stat_lock); sctx->stat.read_errors++; spin_unlock(>stat_lock); - printk_ratelimited_in_rcu(KERN_ERR - "BTRFS: I/O error rebulding logical %llu for dev %s\n", + btrfs_err_rl_in_rcu(fs_info, + "IO error rebuilding logical %llu for dev %s", logical, rcu_str_deref(dev->name)); } else if (sblock->header_error || sblock->checksum_error) { spin_lock(>stat_lock); sctx->stat.uncorrectable_errors++; spin_unlock(>stat_lock); - printk_ratelimited_in_rcu(KERN_ERR - "BTRFS: failed to rebuild valid logical %llu for dev %s\n", + btrfs_err_rl_in_rcu(fs_info, + "failed to rebuild valid logical %llu for dev %s", logical, rcu_str_deref(dev->name)); } else { scrub_write_block_to_dev_replace(sblock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b0dae14242e8..4f80986dbf3c 100644 ---
[PATCH 5/7] btrfs: introduce ratelimited variants of message printing functions
Signed-off-by: David Sterba--- fs/btrfs/ctree.h | 21 + 1 file changed, 21 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9c0fd901edbe..bca42c5733a1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4075,6 +4075,23 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) #define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \ btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args) +/* + * Wrappers that use a ratelimited printk + */ +#define btrfs_emerg_rl(fs_info, fmt, args...) \ + btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args) +#define btrfs_alert_rl(fs_info, fmt, args...) \ + btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args) +#define btrfs_crit_rl(fs_info, fmt, args...) \ + btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args) +#define btrfs_err_rl(fs_info, fmt, args...) \ + btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args) +#define btrfs_warn_rl(fs_info, fmt, args...) \ + btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args) +#define btrfs_notice_rl(fs_info, fmt, args...) \ + btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args) +#define btrfs_info_rl(fs_info, fmt, args...) \ + btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args) #ifdef DEBUG #define btrfs_debug(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) @@ -4082,6 +4099,8 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args) +#define btrfs_debug_rl(fs_info, fmt, args...) \ + btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args) #else #define btrfs_debug(fs_info, fmt, args...) \ no_printk(KERN_DEBUG fmt, ##args) @@ -4089,6 +4108,8 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) no_printk(KERN_DEBUG fmt, ##args) #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ no_printk(KERN_DEBUG fmt, ##args) +#define btrfs_debug_rl(fs_info, fmt, args...) \ + no_printk(KERN_DEBUG fmt, ##args) #endif #define btrfs_printk_in_rcu(fs_info, fmt, args...) \ -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PULL][PATCH 0/7] Updates in message printers and formatters
Hi, this was inspired by recent Anand's post to change the printing format of the btrfs_err etc. helpers to include UUID. We do not use the helpers everywhere, so I've introduced all the missing variants that require eg printing the rcu_string (device name), are ratelimited or are both ratelimited and using rcu_string. First part introduces macros and switches existing users, the patch 7/7 converts some printks to the helpers (avoiding mostly debugging messages). In the end we should route all user-visile messages through some common formatter, this is preparatory work. The mount-time messages are still printk-based. Please consider for the 4.4 dev cycle. Thanks. The following changes since commit 9ffecb10283508260936b96022d4ee43a7798b4c: Linux 4.3-rc3 (2015-09-27 07:50:08 -0400) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git cleanup/messages for you to fetch changes up to f14d104dbdb5044dac9acd0e983ffb60f706c746: btrfs: switch more printks to our helpers (2015-10-08 13:08:03 +0200) David Sterba (7): btrfs: introduce _in_rcu variants of message printing functions btrfs: switch message printers to _in_rcu variants btrfs: introduce ratelimited _in_rcu variants of message printing functions btrfs: switch message printers to ratelimited _in_rcu variants btrfs: introduce ratelimited variants of message printing functions btrfs: switch message printers to ratelimited variants btrfs: switch more printks to our helpers fs/btrfs/check-integrity.c | 4 +-- fs/btrfs/ctree.h| 88 + fs/btrfs/dev-replace.c | 16 - fs/btrfs/disk-io.c | 37 ++- fs/btrfs/extent-tree.c | 14 fs/btrfs/extent_io.c| 26 +++--- fs/btrfs/free-space-cache.c | 10 +++--- fs/btrfs/inode.c| 7 ++-- fs/btrfs/ioctl.c| 8 ++--- fs/btrfs/root-tree.c| 5 +-- fs/btrfs/scrub.c| 42 +++--- fs/btrfs/send.c | 2 +- fs/btrfs/volumes.c | 25 ++--- 13 files changed, 187 insertions(+), 97 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/7] btrfs: introduce ratelimited _in_rcu variants of message printing functions
Signed-off-by: David Sterba--- fs/btrfs/ctree.h | 38 ++ 1 file changed, 38 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 88acdffbe384..9c0fd901edbe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4057,16 +4057,38 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) #define btrfs_info_in_rcu(fs_info, fmt, args...) \ btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args) +/* + * Wrappers that use a ratelimited printk_in_rcu + */ +#define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args) +#define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args) +#define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args) +#define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args) +#define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args) +#define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args) +#define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args) + #ifdef DEBUG #define btrfs_debug(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_in_rcu(fs_info, fmt, args...) \ btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) +#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ + btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args) #else #define btrfs_debug(fs_info, fmt, args...) \ no_printk(KERN_DEBUG fmt, ##args) #define btrfs_debug_in_rcu(fs_info, fmt, args...) \ no_printk(KERN_DEBUG fmt, ##args) +#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ + no_printk(KERN_DEBUG fmt, ##args) #endif #define btrfs_printk_in_rcu(fs_info, fmt, args...) \ @@ -4076,6 +4098,22 @@ do { \ rcu_read_unlock(); \ } while (0) +#define btrfs_printk_ratelimited(fs_info, fmt, args...)\ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + btrfs_printk(fs_info, fmt, ##args); \ +} while (0) + +#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \ +do { \ + rcu_read_lock();\ + btrfs_printk_ratelimited(fs_info, fmt, ##args); \ + rcu_read_unlock(); \ +} while (0) + #ifdef CONFIG_BTRFS_ASSERT __cold -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs: clear bio reference after submit_one_bio()
Hi Naota, What happens if btrfs_bio_alloc() in submit_extent_page fails? Then we return -ENOMEM to the caller, but we do not set *bio_ret to NULL. And if *bio_ret was non-NULL upon entry into submit_extent_page, then we had submitted this bio before getting to btrfs_bio_alloc(). So should btrfs_bio_alloc() failure be handled in the same way? diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3915c94..cd443bc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2834,8 +2834,11 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES, GFP_NOFS | __GFP_HIGH); - if (!bio) + if (!bio) { + if (bio_ret) + *bio_ret = NULL; return -ENOMEM; + } bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; Thanks, Alex. On Wed, Jan 7, 2015 at 12:46 AM, Satoru Takeuchiwrote: > Hi Naota, > > On 2015/01/06 1:01, Naohiro Aota wrote: >> After submit_one_bio(), `bio' can go away. However submit_extent_page() >> leave `bio' referable if submit_one_bio() failed (e.g. -ENOMEM on OOM). >> It will cause invalid paging request when submit_extent_page() is called >> next time. >> >> I reproduced ENOMEM case with the following script (need >> CONFIG_FAIL_PAGE_ALLOC, and CONFIG_FAULT_INJECTION_DEBUG_FS). > > I confirmed that this problem reproduce with 3.19-rc3 and > not reproduce with 3.19-rc3 with your patch. > > Tested-by: Satoru Takeuchi > > Thank you for reporting this problem with the reproducer > and fixing it too. > > NOTE: > I used v3.19-rc3's tools/testing/fault-injection/failcmd.sh > for the following "./failcmd.sh". > > >./failcmd.sh -p $percent -t $times -i $interval \ > >--ignore-gfp-highmem=N --ignore-gfp-wait=N > --min-order=0 \ > >-- \ > >cat $directory/file > /dev/null > > * 3.19-rc1 + your patch > > === > # ./run > 512+0 records in > 512+0 records out > # > === > > * 3.19-rc3 > > === > # ./run > 512+0 records in > 512+0 records out > [ 188.433726] run (776): drop_caches: 1 > [ 188.682372] FAULT_INJECTION: forcing a failure. > name fail_page_alloc, interval 100, probability 111000, space 0, times 3 > [ 188.689986] CPU: 0 PID: 954 Comm: cat Not tainted 3.19.0-rc3-ktest #1 > [ 188.693834] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS > Bochs 01/01/2011 > [ 188.698466] 0064 88007b343618 816e5563 > 88007fc0fc78 > [ 188.702730] 81c655c0 88007b343638 813851b5 > 0010 > [ 188.707043] 0002 88007b343768 81188126 > 88007b3435a8 > [ 188.711283] Call Trace: > [ 188.712620] [] dump_stack+0x45/0x57 > [ 188.715330] [] should_fail+0x135/0x140 > [ 188.718218] [] __alloc_pages_nodemask+0xd6/0xb30 > [ 188.721567] [] ? blk_rq_map_sg+0x35/0x170 > [ 188.724558] [] ? virtio_queue_rq+0x145/0x2b0 > [virtio_blk] > [ 188.728191] [] ? > btrfs_submit_compressed_read+0xcf/0x4d0 [btrfs] > [ 188.732079] [] ? kmem_cache_alloc+0x1cb/0x230 > [ 188.735153] [] ? mempool_alloc_slab+0x15/0x20 > [ 188.738188] [] alloc_pages_current+0x9a/0x120 > [ 188.741153] [] btrfs_submit_compressed_read+0x1a9/0x4d0 > [btrfs] > [ 188.744835] [] btrfs_submit_bio_hook+0x1c1/0x1d0 [btrfs] > [ 188.748225] [] ? lookup_extent_mapping+0x13/0x20 [btrfs] > [ 188.751547] [] ? btrfs_get_extent+0x98/0xad0 [btrfs] > [ 188.754656] [] submit_one_bio+0x67/0xa0 [btrfs] > [ 188.757554] [] submit_extent_page.isra.35+0xd7/0x1c0 > [btrfs] > [ 188.760981] [] __do_readpage+0x31d/0x7b0 [btrfs] > [ 188.763920] [] ? btrfs_create_repair_bio+0x110/0x110 > [btrfs] > [ 188.767382] [] ? btrfs_submit_direct+0x7b0/0x7b0 [btrfs] > [ 188.770671] [] ? btrfs_lookup_ordered_range+0x13d/0x180 > [btrfs] > [ 188.774366] [] > __extent_readpages.constprop.42+0x2ba/0x2d0 [btrfs] > [ 188.778031] [] ? btrfs_submit_direct+0x7b0/0x7b0 [btrfs] > [ 188.781241] [] extent_readpages+0x169/0x1b0 [btrfs] > [ 188.784322] [] ? btrfs_submit_direct+0x7b0/0x7b0 [btrfs] > [ 188.789014] [] btrfs_readpages+0x1f/0x30 [btrfs] > [ 188.792028] [] __do_page_cache_readahead+0x18c/0x1f0 > [ 188.795078] [] ondemand_readahead+0xdf/0x260 > [ 188.797702] [] ? btrfs_congested_fn+0x5f/0xa0 [btrfs] > [ 188.800718] [] page_cache_async_readahead+0x71/0xa0 > [ 188.803650] [] generic_file_read_iter+0x40f/0x5e0 > [ 188.806480] [] new_sync_read+0x7e/0xb0 > [ 188.808832] [] __vfs_read+0x18/0x50 > [ 188.811068] [] vfs_read+0x8a/0x140 > [ 188.813298] [] SyS_read+0x46/0xb0
filesystem goes ro trying to balance. "cpu stuck"
Kernel 4.2.2-1.el7.elrepo btrfs-progs v4.2.1 I'm attempting to convert a filesystem from raid6 to raid10. I didn't have any functional problems with it, but performance is abysmal compared to basically the same arrangement in raid10 so I thought I'd just get away from raid56 for a while (I also saw something about parity raid code developed beyond 2-disk parity that was ignored/thrown away so I'm thinking the devs don't care much about about parity raid at least for now). Partway through the balance something goes wrong and filesystem is forced read-only stopping the balance. I did a fschk and it didn't complain about/find any errors. The drives aren't throwing any errors or incrementing any smart attributes. This is a backup array, so it's not the end of the world if I have to just blow it away and rebuild as raid10 from scratch. The console prints this error. NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [btrfs-balance:8015] Here's the fun stuff out of dmesg [183120.853367] INFO: rcu_sched self-detected stall on CPU { 0} (t=7620235 jiffies g=3046202 c=3046201 q=0) [183120.856391] INFO: rcu_sched detected stalls on CPUs/tasks: { 0} (detected by 3, t=7620238 jiffies, g=3046202, c=3046201, q=0) [183120.856393] Task dump for CPU 0: [183120.856401] btrfs-balance R running task0 8015 2 0x0088 [183120.856407] 8800d8a6f8f8 816c9b6f 81a2b500 880036f4 [183120.856411] 88040d0d5140 8800d8a7 8804094c4620 8804094c4618 [183120.856414] 880036f4 8800d0e8b1a0 8800d8a6f918 816ca177 [183120.856416] Call Trace: [183120.856428] [] ? __schedule+0x2af/0x880 [183120.856435] [] schedule+0x37/0x80 [183120.856441] [] schedule_timeout+0x201/0x2a0 [183120.856448] [] ? wake_up_worker+0x24/0x30 [183120.856451] [] ? insert_work+0x62/0xa0 [183120.856457] [] ? __set_page_dirty_nobuffers+0xe7/0x140 [183120.856463] [] ? list_del+0x11/0x40 [183120.856468] [] wait_for_completion+0x111/0x130 [183120.856474] [] ? wake_up_q+0x80/0x80 [183120.856522] [] btrfs_async_run_delayed_refs+0x133/0x150 [btrfs] [183120.856527] [] ? __slab_free+0x11f/0x217 [183120.856573] [] ? invalidate_extent_cache+0x49/0x1a0 [btrfs] [183120.856579] [] ? kmem_cache_alloc+0x1c8/0x1f0 [183120.856615] [] ? btrfs_drop_snapshot+0x6c/0x850 [btrfs] [183120.856658] [] ? __del_reloc_root+0xb9/0xf0 [btrfs] [183120.856700] [] ? __del_reloc_root+0x41/0xf0 [btrfs] [183120.856742] [] ? __del_reloc_root+0x30/0xf0 [btrfs] [183120.856783] [] ? free_reloc_roots+0x25/0x40 [btrfs] [183120.856825] [] ? merge_reloc_roots+0x173/0x240 [btrfs] [183120.856869] [] ? relocate_block_group+0x265/0x640 [btrfs] [183120.856912] [] ? btrfs_relocate_block_group+0x1c3/0x2d0 [btrfs] [183120.856957] [] ? btrfs_relocate_chunk.isra.39+0x3e/0xc0 [btrfs] [183120.857001] [] ? __btrfs_balance+0x49e/0x8e0 [btrfs] [183120.857046] [] ? btrfs_balance+0x37d/0x650 [btrfs] [183120.857090] [] ? balance_kthread+0x5d/0x80 [btrfs] [183120.857134] [] ? btrfs_balance+0x650/0x650 [btrfs] [183120.857140] [] ? kthread+0xd8/0xf0 [183120.857146] [] ? kthread_create_on_node+0x1b0/0x1b0 [183120.857150] [] ? ret_from_fork+0x3f/0x70 [183120.857155] [] ? kthread_create_on_node+0x1b0/0x1b0 [183120.882383] Task dump for CPU 0: [183120.882385] btrfs-balance R running task0 8015 2 0x0088 [183120.882387] 880036f4 d292fc58 88041fc03d78 810a636f [183120.882390] 81a75300 88041fc03d98 810a8c4d [183120.882392] 0083 0001 88041fc03dc8 810da114 [183120.882394] Call Trace: [183120.882396][] sched_show_task+0xaf/0x110 [183120.882400] [] dump_cpu_task+0x3d/0x50 [183120.882404] [] rcu_dump_cpu_stacks+0x84/0xc0 [183120.882406] [] rcu_check_callbacks+0x4c2/0x7b0 [183120.882409] [] ? acct_account_cputime+0x1c/0x20 [183120.882412] [] ? account_system_time+0x83/0x120 [183120.882414] [] ? tick_sched_do_timer+0x50/0x50 [183120.882417] [] update_process_times+0x39/0x60 [183120.882420] [] tick_sched_handle.isra.17+0x25/0x60 [183120.882422] [] tick_sched_timer+0x44/0x80 [183120.882425] [] __hrtimer_run_queues+0xf3/0x220 [183120.882428] [] hrtimer_interrupt+0xa8/0x1a0 [183120.882430] [] local_apic_timer_interrupt+0x39/0x60 [183120.882433] [] smp_apic_timer_interrupt+0x45/0x60 [183120.882436] [] apic_timer_interrupt+0x6b/0x70 [183120.882437][] ? __del_reloc_root+0xb9/0xf0 [btrfs] [183120.882471] [] ? __del_reloc_root+0x41/0xf0 [btrfs] [183120.882488] [] ? __del_reloc_root+0x30/0xf0 [btrfs] [183120.882505] [] free_reloc_roots+0x25/0x40 [btrfs] [183120.882521] [] merge_reloc_roots+0x173/0x240 [btrfs] [183120.882539] [] relocate_block_group+0x265/0x640 [btrfs] [183120.882556] [] btrfs_relocate_block_group+0x1c3/0x2d0 [btrfs] [183120.882574] [] btrfs_relocate_chunk.isra.39+0x3e/0xc0 [btrfs] [183120.882591] [] __btrfs_balance+0x49e/0x8e0 [btrfs] [183120.882609] []
[PATCH 4/6] btrfs: add comments to barriers before waitqueue_active
Reduce number of undocumented barriers out there. Signed-off-by: David Sterba--- fs/btrfs/compression.c | 3 +++ fs/btrfs/extent-tree.c | 3 +-- fs/btrfs/locking.c | 3 +++ fs/btrfs/ordered-data.c | 6 ++ fs/btrfs/transaction.c | 3 +++ 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 57ee8ca29b06..3a9317ce67f8 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -839,6 +839,9 @@ static void free_workspace(int type, struct list_head *workspace) btrfs_compress_op[idx]->free_workspace(workspace); atomic_dec(alloc_workspace); wake: + /* +* Make sure counter is updated before we wake up waiters. +*/ smp_mb(); if (waitqueue_active(workspace_wait)) wake_up(workspace_wait); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9f9604201333..59eb92f65c62 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10363,8 +10363,7 @@ void btrfs_end_write_no_snapshoting(struct btrfs_root *root) { percpu_counter_dec(>subv_writers->counter); /* -* Make sure counter is updated before we wake up -* waiters. +* Make sure counter is updated before we wake up waiters. */ smp_mb(); if (waitqueue_active(>subv_writers->wait)) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d7e6baf1b205..03f8630dbaf2 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -280,6 +280,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb) if (blockers) { WARN_ON(atomic_read(>spinning_writers)); atomic_dec(>blocking_writers); + /* +* Make sure counter is updated before we wake up waiters. +*/ smp_mb(); if (waitqueue_active(>write_lock_wq)) wake_up(>write_lock_wq); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 52170cf1757e..071005f008c1 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -345,6 +345,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, if (entry->bytes_left == 0) { ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, >flags); + /* +* Implicit memory barrier after test_and_set_bit +*/ if (waitqueue_active(>wait)) wake_up(>wait); } else { @@ -409,6 +412,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, if (entry->bytes_left == 0) { ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, >flags); + /* +* Implicit memory barrier after test_and_set_bit +*/ if (waitqueue_active(>wait)) wake_up(>wait); } else { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e8e5b5a10719..3fd70f797b7d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -861,6 +861,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, atomic_dec(_trans->num_writers); extwriter_counter_dec(cur_trans, trans->type); + /* +* Make sure counter is updated before we wake up waiters. +*/ smp_mb(); if (waitqueue_active(_trans->writer_wait)) wake_up(_trans->writer_wait); -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/6] btrfs: remove waitqueue_active check from btrfs_rm_dev_replace_unblocked
Normally the waitqueue_active would need a barrier, but this is not necessary here because it's not a performance sensitive context and we can call wake_up directly. Suggested-by: Chris MasonSigned-off-by: David Sterba --- fs/btrfs/dev-replace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index e54dd5905cee..733ff75b620e 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -454,8 +454,7 @@ static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) { clear_bit(BTRFS_FS_STATE_DEV_REPLACING, _info->fs_state); - if (waitqueue_active(_info->replace_wait)) - wake_up(_info->replace_wait); + wake_up(_info->replace_wait); } static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6] btrfs: comment waitqueue_active implied by locks
Suggested-by: Chris MasonSigned-off-by: David Sterba --- fs/btrfs/raid56.c | 6 +- fs/btrfs/tree-log.c | 6 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index fcf7265ca46f..1a33d3eb36de 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -810,7 +810,11 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) } goto done_nolock; - } else if (waitqueue_active(>wait)) { + /* +* The barrier for this waitqueue_active is not needed, +* we're protected by h->lock and can't miss a wakeup. +*/ + } else if (waitqueue_active(>wait)) { spin_unlock(>bio_list_lock); spin_unlock_irqrestore(>lock, flags); wake_up(>wait); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1bbaace73383..d0deb4643502 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2950,6 +2950,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, atomic_set(_root_tree->log_commit[index2], 0); mutex_unlock(_root_tree->log_mutex); + /* +* The barrier before waitqueue_active is implied by mutex_unlock +*/ if (waitqueue_active(_root_tree->log_commit_wait[index2])) wake_up(_root_tree->log_commit_wait[index2]); out: @@ -2961,6 +2964,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, atomic_set(>log_commit[index1], 0); mutex_unlock(>log_mutex); + /* +* The barrier before waitqueue_active is implied by mutex_unlock +*/ if (waitqueue_active(>log_commit_wait[index1])) wake_up(>log_commit_wait[index1]); return ret; -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/6] btrfs: comment the rest of implicit barriers before waitqueue_active
There are atomic operations that imply the barrier for waitqueue_active mixed in an if-condition. Signed-off-by: David Sterba--- fs/btrfs/delayed-inode.c | 4 fs/btrfs/disk-io.c | 3 +++ fs/btrfs/inode.c | 3 +++ fs/btrfs/locking.c | 9 + fs/btrfs/volumes.c | 3 +++ 5 files changed, 22 insertions(+) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index a2ae42720a6a..e0941fbb913c 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -463,6 +463,10 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node, static void finish_one_item(struct btrfs_delayed_root *delayed_root) { int seq = atomic_inc_return(_root->items_seq); + + /* +* atomic_dec_return implies a barrier for waitqueue_active +*/ if ((atomic_dec_return(_root->items) < BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) && waitqueue_active(_root->wait)) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 295795aebe0b..379526ffd84d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -802,6 +802,9 @@ static void run_one_async_done(struct btrfs_work *work) limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; + /* +* atomic_dec_return implies a barrier for waitqueue_active +*/ if (atomic_dec_return(_info->nr_async_submits) < limit && waitqueue_active(_info->async_submit_wait)) wake_up(_info->async_submit_wait); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 611b66d73e80..7be4abe25e06 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1096,6 +1096,9 @@ static noinline void async_cow_submit(struct btrfs_work *work) nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; + /* +* atomic_sub_return implies a barrier for waitqueue_active +*/ if (atomic_sub_return(nr_pages, >fs_info->async_delalloc_pages) < 5 * 1024 * 1024 && waitqueue_active(>fs_info->async_submit_wait)) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 03f8630dbaf2..8077461fc56a 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -79,6 +79,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) write_lock(>lock); WARN_ON(atomic_read(>spinning_writers)); atomic_inc(>spinning_writers); + /* +* atomic_dec_and_test implies a barrier for waitqueue_active +*/ if (atomic_dec_and_test(>blocking_writers) && waitqueue_active(>write_lock_wq)) wake_up(>write_lock_wq); @@ -86,6 +89,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) BUG_ON(atomic_read(>blocking_readers) == 0); read_lock(>lock); atomic_inc(>spinning_readers); + /* +* atomic_dec_and_test implies a barrier for waitqueue_active +*/ if (atomic_dec_and_test(>blocking_readers) && waitqueue_active(>read_lock_wq)) wake_up(>read_lock_wq); @@ -229,6 +235,9 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) } btrfs_assert_tree_read_locked(eb); WARN_ON(atomic_read(>blocking_readers) == 0); + /* +* atomic_dec_and_test implies a barrier for waitqueue_active +*/ if (atomic_dec_and_test(>blocking_readers) && waitqueue_active(>read_lock_wq)) wake_up(>read_lock_wq); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6fc735869c18..ff3527192409 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -345,6 +345,9 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) pending = pending->bi_next; cur->bi_next = NULL; + /* +* atomic_dec_return implies a barrier for waitqueue_active +*/ if (atomic_dec_return(_info->nr_async_bios) < limit && waitqueue_active(_info->async_submit_wait)) wake_up(_info->async_submit_wait); -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/6] btrfs: add barrier for waitqueue_active in clear_btree_io_tree
waitqueue_active should be preceded by a barrier, in this function we don't need to call it all the time. Signed-off-by: David Sterba--- fs/btrfs/transaction.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 74bc3338418b..e8e5b5a10719 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -82,6 +82,12 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) static void clear_btree_io_tree(struct extent_io_tree *tree) { spin_lock(>lock); + /* +* Do a single barrier for the waitqueue_active check here, the state +* of the waitqueue should not change once clear_btree_io_tree is +* called. +*/ + smp_mb(); while (!RB_EMPTY_ROOT(>state)) { struct rb_node *node; struct extent_state *state; -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PULL][PATCH 0/6] Barriers around waitqueue_active (V2)
Hi, I've updated the main patch according to your comments in https://www.mail-archive.com/linux-btrfs%40vger.kernel.org/msg42551.html though with one exception, the barrier in btrfs_bio_counter_sub which seems to be in performance sensitive context but I did not find a good way how determine that dev-replace is running. It's protected by the shared satus of 'mutually_exclusive_operation_running' and we'd have to do some other checks that would also imply some performance drop. I hope it's ok to skipt that one for now, the remaining new barriers look safe and the rest is documenting the existing ones. Please consider pulling this for the 4.4 cycle. Thanks. The following changes since commit 9ffecb10283508260936b96022d4ee43a7798b4c: Linux 4.3-rc3 (2015-09-27 07:50:08 -0400) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git fix/waitqueue-barriers for you to fetch changes up to ee86395458072760d62e66aad10a5e9e8902b8cf: btrfs: comment the rest of implicit barriers before waitqueue_active (2015-10-10 18:42:00 +0200) David Sterba (6): btrfs: remove waitqueue_active check from btrfs_rm_dev_replace_unblocked btrfs: add barrier for waitqueue_active in clear_btree_io_tree btrfs: comment waitqueue_active implied by locks btrfs: add comments to barriers before waitqueue_active btrfs: remove extra barrier before waitqueue_active btrfs: comment the rest of implicit barriers before waitqueue_active fs/btrfs/compression.c | 3 +++ fs/btrfs/delayed-inode.c | 4 fs/btrfs/dev-replace.c | 3 +-- fs/btrfs/disk-io.c | 3 +++ fs/btrfs/extent-tree.c | 3 +-- fs/btrfs/inode.c | 3 +++ fs/btrfs/locking.c | 12 fs/btrfs/ordered-data.c | 6 ++ fs/btrfs/raid56.c| 6 +- fs/btrfs/transaction.c | 9 + fs/btrfs/tree-log.c | 14 -- fs/btrfs/volumes.c | 3 +++ 12 files changed, 62 insertions(+), 7 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/6] btrfs: remove extra barrier before waitqueue_active
Removing barriers is scary, but a call to atomic_dec_and_test implies a barrier, so we don't need to issue another one. Signed-off-by: David Sterba--- fs/btrfs/tree-log.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d0deb4643502..63275594debd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -229,7 +229,9 @@ int btrfs_pin_log_trans(struct btrfs_root *root) void btrfs_end_log_trans(struct btrfs_root *root) { if (atomic_dec_and_test(>log_writers)) { - smp_mb(); + /* +* Implicit memory barrier after atomic_dec_and_test +*/ if (waitqueue_active(>log_writer_wait)) wake_up(>log_writer_wait); } @@ -2820,7 +2822,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_lock(_root_tree->log_mutex); if (atomic_dec_and_test(_root_tree->log_writers)) { - smp_mb(); + /* +* Implicit memory barrier after atomic_dec_and_test +*/ if (waitqueue_active(_root_tree->log_writer_wait)) wake_up(_root_tree->log_writer_wait); } -- 2.1.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
BTRFS with 8TB SMR drives
Hi guys, just added a new Seagate Archive 8TB drive to my BTRFS volume and I'm getting a tonne of errors when balancing or scrubbing. A short smartctl test reports fine, running a long one now. Will also run seatools from a bootable DOS USB while at work today. Running latest firmware on my 9240-8i which explicitly supports this drive. I'm finding it very hard to tell if SMR drives are OK with BTRFS currently - anyone chime in? Thanks, Warren [wsh@cloud storcli]$ uname -a Linux cloud.warrenhughes.net 4.1.10-2-lts #1 SMP Wed Oct 7 21:57:44 CEST 2015 x86_64 GNU/Linux [wsh@cloud storcli]$ sudo btrfs version btrfs-progs v4.2.1 [wsh@cloud ~]$ sudo btrfs scrub status /mnt/media scrub status for 643c3145-8371-4011-8c34-20240e1bbaff scrub started at Sun Oct 11 20:37:38 2015 and was aborted after 10:35:47 total bytes scrubbed: 8.15TiB with 104218141 errors error details: read=98736175 csum=5481966 corrected errors: 5484382, uncorrectable errors: 98733759, unverified errors: 0 [/dev/sdo].write_io_errs 100154203 [/dev/sdo].read_io_errs98735251 [/dev/sdo].flush_io_errs 634 [/dev/sdo].corruption_errs 5481966 [/dev/sdo].generation_errs 0 [wsh@cloud ~]$ sudo smartctl -H -T permissive /dev/sdo smartctl 6.4 2015-06-04 r4109 [x86_64-linux-4.1.10-2-lts] (local build) Copyright (C) 2002-15, Bruce Allen, Christian Franke, www.smartmontools.org Short INQUIRY response, skip product id === START OF READ SMART DATA SECTION === SMART Health Status: OK [wsh@cloud storcli]$ sudo ./storcli64 /c0 show Product Name = LSI MegaRAID SAS 9240-8i Serial Number = P51010 SAS Address = 500605b004e9d030 PCI Address = 00:03:00:00 System Time = 10/12/2015 07:38:22 Mfg. Date = 03/17/10 Controller Time = 10/12/2015 07:38:20 FW Package Build = 20.13.1-0240 BIOS Version = 4.38.02.2_4.16.08.00_0x06060A05 FW Version = 2.130.404-4659 Driver Name = megaraid_sas Driver Version = 06.806.08.00-rc1 Vendor Id = 0x1000 Device Id = 0x73 SubVendor Id = 0x1000 SubDevice Id = 0x9240 Host Interface = PCI-E Device Interface = SAS-6G Bus Number = 3 Device Number = 0 Function Number = 0 Physical Drives = 7 PD LIST : === --- EID:Slt DID State DG Size Intf Med SED PI SeSz ModelSp --- 64:1 4 JBOD - 1.363 TB SATA HDD N N 512B WDC WD15EADS-00P8B0 U 64:2 0 JBOD - 2.728 TB SATA HDD N N 512B WDC WD30EFRX-68AX9N0 U 64:3 7 JBOD - 2.728 TB SATA HDD N N 512B ST3000DM001-1CH166 U 64:4 6 JBOD - 2.728 TB SATA HDD N N 512B WDC WD30EFRX-68AX9N0 U 64:5 5 JBOD - 2.728 TB SATA HDD N N 512B WDC WD30EFRX-68EUZN0 U 64:6 3 JBOD - 2.728 TB SATA HDD N N 512B WDC WD30EFRX-68AX9N0 U 64:7 2 JBOD - 2.728 TB SATA HDD N N 512B WDC WD30EFRX-68AX9N0 U --- -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!
the output of btrfs check --readonly /dev/sdb http://pastebin.com/UxkeVd7Y many entrys with "extent buffer leak" the output of btrfs-show-super -i0 /dev/sd[bcd] && btrfs-show-super -i1 /dev/sd[bcd] && btrfs-show-super -i2 /dev/sd[bcd] http://pastebin.com/zs7B8827 http://pastebin.com/Kn1kwgYv http://pastebin.com/CHC52ef7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!
Ok, that's what i expected. :) if it will work :) -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel BUG at /linux/fs/btrfs/extent-tree.c:1833!
Hello Peter, I have the same problem you have, as reported ~1 month ago on this mailing-list. My setup is 2 disks, and I tried balancing after adding a third one, in a raid5 configuration. I also have some "extent buffer leak" in my btrfsck, but it's hard to say if it can be the cause. If you look at the source code you'll see that those messages are not printed by the main checking routine but by an helper subroutine, I'm not even sure it represents a problem on the filesystem (maybe somebody can sched some light here). I tried with the 4.3-rc2 kernel, and the kernel bug is still there, unfortunately. I also posted an ftrace of the bug, hopefully somebody with enough btrfs knowledge will have a look. I reproduced this bug dozens of time, and as far as I can tell I never lost any single byte because of the crash, probably thanks to the transaction system of btrfs, so, at least there's that. -- Stéphane. Le 11 octobre 2015 22:50:07 Peter Beckera écrit : the output of btrfs check --readonly /dev/sdb http://pastebin.com/UxkeVd7Y many entrys with "extent buffer leak" the output of btrfs-show-super -i0 /dev/sd[bcd] && btrfs-show-super -i1 /dev/sd[bcd] && btrfs-show-super -i2 /dev/sd[bcd] http://pastebin.com/zs7B8827 http://pastebin.com/Kn1kwgYv http://pastebin.com/CHC52ef7 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: BTRFS with 8TB SMR drives
Warren Hughes warrenhughes.net> writes: > > Hi guys, just added a new Seagate Archive 8TB drive to my BTRFS volume > and I'm getting a tonne of errors when balancing or scrubbing. > > A short smartctl test reports fine, running a long one now. Will also > run seatools from a bootable DOS USB while at work today. > > Running latest firmware on my 9240-8i which explicitly supports this drive. > > I'm finding it very hard to tell if SMR drives are OK with BTRFS > currently - anyone chime in? > > Thanks, Warren > > [wsh cloud storcli]$ uname -a > Linux cloud.warrenhughes.net 4.1.10-2-lts #1 SMP Wed Oct 7 21:57:44 > CEST 2015 x86_64 GNU/Linux > > [wsh cloud storcli]$ sudo btrfs version > btrfs-progs v4.2.1 > > [wsh cloud ~]$ sudo btrfs scrub status /mnt/media > scrub status for 643c3145-8371-4011-8c34-20240e1bbaff > scrub started at Sun Oct 11 20:37:38 2015 and was aborted after 10:35:47 > total bytes scrubbed: 8.15TiB with 104218141 errors > error details: read=98736175 csum=5481966 > corrected errors: 5484382, uncorrectable errors: 98733759, > unverified errors: 0 > > [/dev/sdo].write_io_errs 100154203 > [/dev/sdo].read_io_errs98735251 > [/dev/sdo].flush_io_errs 634 > [/dev/sdo].corruption_errs 5481966 > [/dev/sdo].generation_errs 0 > hi Warren, I recently (last week) built a 3 disk RAID 5 array using the same 8TB drives which worked fine holding ~12TB then added a 4th disk using a JMicron PCI SATA controller. I then ran a balance which failed after just over 1TB written to the 4th disk. This caused the entire array to fail but the main difference to your scenario was that the 4th disk also wasn't reporting to SMART properly. I then moved all 4 disks onto the motherboard based SATA controller, built the array fresh and have copied ~18TB onto it and it seems to be working fine. Perhaps I should try a scrub and see :) I'm using Centos 7.1 but kernel 4.2.1-ml and btrfs-progs 4.2.2 Kristan -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: BTRFS with 8TB SMR drives
Thanks Kristan, a scrub would be great; mine appeared to be working fine until the scrub (although I hadn't yet run a balance on it so who knows). I might move my 8TB onto the motherboard controller and see if the situation improves. Will update here tonight. Cheers, W. On 12 October 2015 at 11:53, Kristanwrote: > Warren Hughes warrenhughes.net> writes: > >> >> Hi guys, just added a new Seagate Archive 8TB drive to my BTRFS volume >> and I'm getting a tonne of errors when balancing or scrubbing. >> >> A short smartctl test reports fine, running a long one now. Will also >> run seatools from a bootable DOS USB while at work today. >> >> Running latest firmware on my 9240-8i which explicitly supports this > drive. >> >> I'm finding it very hard to tell if SMR drives are OK with BTRFS >> currently - anyone chime in? >> >> Thanks, Warren >> >> [wsh cloud storcli]$ uname -a >> Linux cloud.warrenhughes.net 4.1.10-2-lts #1 SMP Wed Oct 7 21:57:44 >> CEST 2015 x86_64 GNU/Linux >> >> [wsh cloud storcli]$ sudo btrfs version >> btrfs-progs v4.2.1 >> >> [wsh cloud ~]$ sudo btrfs scrub status /mnt/media >> scrub status for 643c3145-8371-4011-8c34-20240e1bbaff >> scrub started at Sun Oct 11 20:37:38 2015 and was aborted > after 10:35:47 >> total bytes scrubbed: 8.15TiB with 104218141 errors >> error details: read=98736175 csum=5481966 >> corrected errors: 5484382, uncorrectable errors: 98733759, >> unverified errors: 0 >> >> [/dev/sdo].write_io_errs 100154203 >> [/dev/sdo].read_io_errs98735251 >> [/dev/sdo].flush_io_errs 634 >> [/dev/sdo].corruption_errs 5481966 >> [/dev/sdo].generation_errs 0 >> > > hi Warren, > > I recently (last week) built a 3 disk RAID 5 array using the same 8TB > drives which worked fine holding ~12TB then added a 4th disk using a > JMicron PCI SATA controller. I then ran a balance which failed after > just over 1TB written to the 4th disk. This caused the entire array to > fail but the main difference to your scenario was that the 4th disk also > wasn't reporting to SMART properly. > I then moved all 4 disks onto the motherboard based SATA controller, > built the array fresh and have copied ~18TB onto it and it seems to be > working fine. Perhaps I should try a scrub and see :) > > I'm using Centos 7.1 but kernel 4.2.1-ml and btrfs-progs 4.2.2 > Kristan > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Warren Hughes +64 21 633324 IM: gtalk + msn: this email address, skype: akawsh -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs: remove empty header file extent-tree.h
Ping? Any comment? Thanks, Qu Qu Wenruo wrote on 2015/09/29 09:51 +0800: Hi Chris, Would you please merge this patch? The empty header is introduced by my qgroup accounting rework, and the cleanup patch is missed in 4.2. Thanks, Qu Qu Wenruo wrote on 2015/07/03 09:17 +0800: The empty file is introduced as an careless 'git add', remove it. Reported-by: David SterbaSigned-off-by: Qu Wenruo --- fs/btrfs/extent-tree.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 fs/btrfs/extent-tree.h diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h deleted file mode 100644 index e69de29..000 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: BTRFS with 8TB SMR drives
Hopefully this is of use - its a beast; 34MB when uncompressed https://drive.google.com/file/d/0B74Kimpwe3nYYUZ2YTMtQXB4V1U/view?usp=sharing On 12 October 2015 at 14:43, Chris Murphywrote: > Is it possible to get a complete dmesg included in the thread, or if > it's too big attach it to a bug report? I'm curious if there are any > libata messages, as well as the specific Btrfs messages. > > > --- > Chris Murphy -- Warren Hughes +64 21 633324 IM: gtalk + msn: this email address, skype: akawsh -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: BTRFS with 8TB SMR drives
more info for anyone interested: [wsh@cloud ~]$ sudo btrfs fi df /mnt/media Data, RAID1: total=13.64TiB, used=13.61TiB System, RAID1: total=32.00MiB, used=2.22MiB Metadata, RAID1: total=16.00GiB, used=15.10GiB GlobalReserve, single: total=512.00MiB, used=0.00B [wsh@cloud ~]$ sudo btrfs fi sh /mnt/media Label: none uuid: 643c3145-8371-4011-8c34-20240e1bbaff Total devices 11 FS bytes used 13.63TiB devid8 size 2.73TiB used 2.54TiB path /dev/sdh devid9 size 2.73TiB used 2.54TiB path /dev/sdc devid 10 size 2.73TiB used 2.54TiB path /dev/sdf devid 11 size 1.82TiB used 1.63TiB path /dev/sdn devid 12 size 2.73TiB used 2.54TiB path /dev/sdg devid 14 size 2.73TiB used 2.54TiB path /dev/sda devid 15 size 2.73TiB used 2.54TiB path /dev/sdd devid 16 size 2.73TiB used 2.54TiB path /dev/sdk devid 17 size 2.73TiB used 2.54TiB path /dev/sdl devid 18 size 3.64TiB used 3.45TiB path /dev/sdm devid 19 size 7.28TiB used 1.93TiB path /dev/sdo btrfs-progs v4.2.1 On 12 October 2015 at 14:43, Chris Murphywrote: > Is it possible to get a complete dmesg included in the thread, or if > it's too big attach it to a bug report? I'm curious if there are any > libata messages, as well as the specific Btrfs messages. > > > --- > Chris Murphy -- Warren Hughes +64 21 633324 IM: gtalk + msn: this email address, skype: akawsh -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: BTRFS with 8TB SMR drives
Is it possible to get a complete dmesg included in the thread, or if it's too big attach it to a bug report? I'm curious if there are any libata messages, as well as the specific Btrfs messages. --- Chris Murphy -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Questions about FIEMAP
Hello everyone, After googled a bit, I got information that btrfs supports FIEMAP (as "cp" needs it), but it's not valid for "write" operation. I guess we cannot write to block device directly after get block list using FIEMAP. This is because: 1. COW feature of btrfs (but this can be disabled using NOCOW) 2. File system rebalance 3. Defragmentation Aren't item #2 and #3 also a problem for "read" operation? For example, after "cp" get block list using FIEMAP, file system rebalance occurs, So, previous result of FIEMAP is not valid anymore. Or maybe I misunderstood something. Please correct me. Thanks Mike -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: filesystem goes ro trying to balance. "cpu stuck"
Donald Pearson posted on Sun, 11 Oct 2015 11:46:14 -0500 as excerpted: > Kernel 4.2.2-1.el7.elrepo btrfs-progs v4.2.1 > > I'm attempting to convert a filesystem from raid6 to raid10. I didn't > have any functional problems with it, but performance is abysmal > compared to basically the same arrangement in raid10 so I thought I'd > just get away from raid56 for a while (I also saw something about parity > raid code developed beyond 2-disk parity that was ignored/thrown away so > I'm thinking the devs don't care much about about parity raid at least > for now). Note on the parity-raid story: AFAIK at least the btrfs folks aren't ignoring it (I don't know about the mdraid/dmraid folks). There's simply more opportunities for new features than there are coders to code them up, and while progress is indeed occurring, some of these features may well take years. Consider, even standard raid56 support was originally planned for IIRC 3.5, but it wasn't actually added until (IIRC) 3.9, and that was only partial/runtime support (the parities were being calculated and written, but the tools to rebuild from parity were incomplete/broken/non-existent, so it was effectively a slow raid0 in terms of reliability, that would be upgraded to raid56 "for free" once the tools were done). Complete raid56 support wasn't even nominally there until 3.19, with the initial bugs still being worked out thru 4.0 and into 4.1. So it took about /three/ /years/ longer than initially planned. This sort of longer-to-implement-than-planned pattern has repeated multiple times over the life of btrfs, which is why it's taking so long to mature and stabilize. So it's not that multi-parity-raid is being rejected or ignored, it's simply that there's way more to do than people to do it, and btrfs as a cow-based filesystem isn't exactly the simplest thing to implement correctly, so initial plans turned out to be /wildly/ optimistic, and honestly, some of these features, while not rejected, could well be a decade out. Obviously others will be implemented before then, but there's just so many, and so few devs working on what really is a complex project, so something ends up being shoved back to that decade out, and that's the way it's going to be unless btrfs suddenly gets way more developer resources working on it than it has now. > Partway through the balance something goes wrong and filesystem is > forced read-only stopping the balance. > > I did a fschk and it didn't complain about/find any errors. The drives > aren't throwing any errors or incrementing any smart attributes. This > is a backup array, so it's not the end of the world if I have to just > blow it away and rebuild as raid10 from scratch. > > The console prints this error. > NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! > [btrfs-balance:8015] I'm a user not a dev, tho I am a regular on this list, and backtraces don't mean a lot to me, so take this FWIW... 1) How old is the filesystem? It isn't quite new, created with mkfs.btrfs from btrfs-progs v4.2.0 or v4.2.1, is it? There's a known mkfs.btrfs bug along in there, that I don't remember whether it's fixed by 4.2.1 or only the latest 4.2.2, but it creates invalid filesystems. Btrfs check from 4.2.2 can detect the problem, but can't fix it, and as the filesystems as they are are unstable, it's best to get what you need off of them and recreate them with a non-buggy mkfs.btrfs ASAP. 2) Since you're on progs v4.2.1 ATM, that may apply to its mkfs.btrfs as well. Please upgrade to 4.2.2 before creating any further btrfs, or failing that, downgrade to 4.1.3 or whatever the last in the progs 4.1 series was. 3) Are you running btrfs quotas on the filesystem? Unfortunately, btrfs quota handling code remains an unstable sore spot, tho they're continuing to work hard on fixing it. I'm continuing to recommend, as I have for some time now, that people don't use it unless they're willing to deal with the problems and are actively working with the devs to fix them. Otherwise, either they need quota support and should really choose a filesystem where the feature is mature and stable, or they don't, in which case just leaving it off (or turning it off if on) avoids the problem. There's at least two confirmed reasonably recent cases where turning off btrfs quota support eliminated the issues people were reporting, so this isn't an idle recommendation, it really does help in at least some cases. If you don't really need quotas, leave (or turn) them off. If you do, you really should be using a filesystem where the quota feature is mature and stable enough to rely on. Yes, it does make a difference. 4) Snapshots (scaling). While snapshots are a reasonably mature feature, they do remain a scaling challenge. My recommendation is that you try to keep to about 250-ish snapshots per subvolume, no more than 3000 snapshots worst-case total, and better no more than 1000 or
[PATCH] btrfs-progs: Add all missing close_ctree and btrfs_close_all_devices
This patch add all missing close_ctree and btrfs_close_all_devices to several tools in btrfs progs, to avoid memory leak. Signed-off-by: Zhao Lei--- btrfs-calc-size.c| 1 + btrfs-debug-tree.c | 5 - btrfs-find-root.c| 1 + btrfs-map-logical.c | 1 + btrfs-select-super.c | 3 +++ btrfstune.c | 1 + cmds-filesystem.c| 14 -- cmds-property.c | 2 ++ cmds-rescue.c| 5 - cmds-restore.c | 1 + cmds-scrub.c | 4 +++- 11 files changed, 29 insertions(+), 9 deletions(-) diff --git a/btrfs-calc-size.c b/btrfs-calc-size.c index 7287858..b756693 100644 --- a/btrfs-calc-size.c +++ b/btrfs-calc-size.c @@ -508,5 +508,6 @@ int main(int argc, char **argv) out: close_ctree(root); free(roots); + btrfs_close_all_devices(); return ret; } diff --git a/btrfs-debug-tree.c b/btrfs-debug-tree.c index 7d8e876..8adc39f 100644 --- a/btrfs-debug-tree.c +++ b/btrfs-debug-tree.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "print-tree.h" #include "transaction.h" +#include "volumes.h" #include "utils.h" static int print_usage(int ret) @@ -428,5 +429,7 @@ no_node: printf("uuid %s\n", uuidbuf); printf("%s\n", PACKAGE_STRING); close_root: - return close_ctree(root); + ret = close_ctree(root); + btrfs_close_all_devices(); + return ret; } diff --git a/btrfs-find-root.c b/btrfs-find-root.c index 01b3603..fc3812c 100644 --- a/btrfs-find-root.c +++ b/btrfs-find-root.c @@ -216,5 +216,6 @@ int main(int argc, char **argv) out: btrfs_find_root_free(); close_ctree(root); + btrfs_close_all_devices(); return ret; } diff --git a/btrfs-map-logical.c b/btrfs-map-logical.c index d9fa6b2..deccb03 100644 --- a/btrfs-map-logical.c +++ b/btrfs-map-logical.c @@ -357,6 +357,7 @@ out_close_fd: close: free(output_file); close_ctree(root); + btrfs_close_all_devices(); if (ret < 0) ret = 1; return ret; diff --git a/btrfs-select-super.c b/btrfs-select-super.c index b790f3e..df74153 100644 --- a/btrfs-select-super.c +++ b/btrfs-select-super.c @@ -23,6 +23,7 @@ #include #include "kerncompat.h" #include "ctree.h" +#include "volumes.h" #include "disk-io.h" #include "print-tree.h" #include "transaction.h" @@ -101,5 +102,7 @@ int main(int ac, char **av) */ printf("using SB copy %llu, bytenr %llu\n", (unsigned long long)num, (unsigned long long)bytenr); + close_ctree(root); + btrfs_close_all_devices(); return ret; } diff --git a/btrfstune.c b/btrfstune.c index c248ee6..0907aa9 100644 --- a/btrfstune.c +++ b/btrfstune.c @@ -548,6 +548,7 @@ int main(int argc, char *argv[]) } out: close_ctree(root); + btrfs_close_all_devices(); return ret; } diff --git a/cmds-filesystem.c b/cmds-filesystem.c index 3663734..a14cb85 100644 --- a/cmds-filesystem.c +++ b/cmds-filesystem.c @@ -918,6 +918,7 @@ devs_only: } out: printf("%s\n", PACKAGE_STRING); + btrfs_close_all_devices(); free_seen_fsid(); return ret; } @@ -1279,21 +1280,22 @@ static const char * const cmd_filesystem_label_usage[] = { static int cmd_filesystem_label(int argc, char **argv) { + int ret; + if (check_argc_min(argc, 2) || check_argc_max(argc, 3)) usage(cmd_filesystem_label_usage); if (argc > 2) { - return set_label(argv[1], argv[2]); + ret = set_label(argv[1], argv[2]); } else { char label[BTRFS_LABEL_SIZE]; - int ret; - ret = get_label(argv[1], label); if (!ret) - fprintf(stdout, "%s\n", label); - - return ret; + printf("%s\n", label); } + + btrfs_close_all_devices(); + return ret; } static const char filesystem_cmd_group_info[] = diff --git a/cmds-property.c b/cmds-property.c index 0ffd250..f8c2823 100644 --- a/cmds-property.c +++ b/cmds-property.c @@ -22,6 +22,7 @@ #include #include +#include "volumes.h" #include "commands.h" #include "props.h" #include "ctree.h" @@ -319,6 +320,7 @@ static int setget_prop(int types, const char *object, else ret = 0; + btrfs_close_all_devices(); out: return ret; diff --git a/cmds-rescue.c b/cmds-rescue.c index fb3227b..4885b8a 100644 --- a/cmds-rescue.c +++ b/cmds-rescue.c @@ -19,6 +19,7 @@ #include "kerncompat.h" #include +#include "volumes.h" #include "ctree.h" #include "transaction.h" #include "disk-io.h" @@ -101,6 +102,7 @@ static int cmd_rescue_chunk_recover(int argc, char *argv[]) } else { fprintf(stdout, "Fail to recover the chunk tree.\n"); } + btrfs_close_all_devices(); return ret; } @@ -149,6 +151,7 @@ static int cmd_rescue_super_recover(int