Re: [PATCH V2] mm/slab: add a leak decoder callback
On wed, 16 Jan 2013 11:03:13 +0800, Liu Bo wrote: > This adds a leak decoder callback so that slab destruction > can use to generate debugging output for the allocated objects. > > Callers like btrfs are using their own leak tracking which will > manage allocated objects in a list(or something else), this does > indeed the same thing as what slab does. So adding a callback > for leak tracking can avoid this as well as runtime overhead. If the slab is merged with the other one, this patch can work well? Thanks Miao > (The idea is from Zach Brown .) > > Signed-off-by: Liu Bo > --- > v2: add a wrapper API for slab destruction to make decoder only > work in particular path. > > fs/btrfs/extent_io.c | 26 -- > fs/btrfs/extent_map.c| 13 - > include/linux/slab.h |2 ++ > include/linux/slab_def.h |1 + > include/linux/slub_def.h |1 + > mm/slab_common.c | 17 - > mm/slub.c|2 ++ > 7 files changed, 58 insertions(+), 4 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index bcc8dff..355c7fc 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -63,6 +63,26 @@ tree_fs_info(struct extent_io_tree *tree) > return btrfs_sb(tree->mapping->host->i_sb); > } > > +static void extent_state_leak_decoder(void *object) > +{ > + struct extent_state *state = object; > + > + printk(KERN_ERR "btrfs state leak: start %llu end %llu " > +"state %lu in tree %p refs %d\n", > +(unsigned long long)state->start, > +(unsigned long long)state->end, > +state->state, state->tree, atomic_read(&state->refs)); > +} > + > +static void extent_buffer_leak_decoder(void *object) > +{ > + struct extent_buffer *eb = object; > + > + printk(KERN_ERR "btrfs buffer leak start %llu len %lu " > +"refs %d\n", (unsigned long long)eb->start, > +eb->len, atomic_read(&eb->refs)); > +} > + > int __init extent_io_init(void) > { > extent_state_cache = kmem_cache_create("btrfs_extent_state", > @@ -115,9 +135,11 @@ void extent_io_exit(void) >*/ > rcu_barrier(); > if (extent_state_cache) > - kmem_cache_destroy(extent_state_cache); > + kmem_cache_destroy_decoder(extent_state_cache, > +extent_state_leak_decoder); > if (extent_buffer_cache) > - kmem_cache_destroy(extent_buffer_cache); > + kmem_cache_destroy_decoder(extent_buffer_cache, > +extent_buffer_leak_decoder); > } > > void extent_io_tree_init(struct extent_io_tree *tree, > diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c > index f359e4c..bccba3d 100644 > --- a/fs/btrfs/extent_map.c > +++ b/fs/btrfs/extent_map.c > @@ -16,6 +16,16 @@ static LIST_HEAD(emaps); > static DEFINE_SPINLOCK(map_leak_lock); > #endif > > +static void extent_map_leak_decoder(void *object) > +{ > + struct extent_map *em = object; > + > + printk(KERN_ERR "btrfs ext map leak: start %llu len %llu block %llu " > +"flags %lu refs %d in tree %d compress %d\n", > +em->start, em->len, em->block_start, em->flags, > +atomic_read(&em->refs), em->in_tree, (int)em->compress_type); > +} > + > int __init extent_map_init(void) > { > extent_map_cache = kmem_cache_create("btrfs_extent_map", > @@ -39,7 +49,8 @@ void extent_map_exit(void) > } > > if (extent_map_cache) > - kmem_cache_destroy(extent_map_cache); > + kmem_cache_destroy_decoder(extent_map_cache, > +extent_map_leak_decoder); > } > > /** > diff --git a/include/linux/slab.h b/include/linux/slab.h > index 5d168d7..5c6a8d8 100644 > --- a/include/linux/slab.h > +++ b/include/linux/slab.h > @@ -114,6 +114,7 @@ struct kmem_cache { > const char *name; /* Slab name for sysfs */ > int refcount; /* Use counter */ > void (*ctor)(void *); /* Called on object slot creation */ > + void (*decoder)(void *);/* Called on object slot leak detection */ > struct list_head list; /* List of all slab caches on the system */ > }; > #endif > @@ -132,6 +133,7 @@ struct kmem_cache * > kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t, > unsigned long, void (*)(void *), struct kmem_cache *); > void kmem_cache_destroy(struct kmem_cache *); > +void kmem_cache_destroy_decoder(struct kmem_cache *, void (*)(void *)); > int kmem_cache_shrink(struct kmem_cache *); > void kmem_cache_free(struct kmem_cache *, void *); > > diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h > index 8bb6e0e..7ca8309 100644 > --- a/include/linux/slab_def.h > +++ b/include/linux/slab_def.h > @@ -48,6 +48,7 @@ struct kmem_cache { > > /* constructor func */ > void (*ctor)(void *o
[PATCH V2] mm/slab: add a leak decoder callback
This adds a leak decoder callback so that slab destruction can use to generate debugging output for the allocated objects. Callers like btrfs are using their own leak tracking which will manage allocated objects in a list(or something else), this does indeed the same thing as what slab does. So adding a callback for leak tracking can avoid this as well as runtime overhead. (The idea is from Zach Brown .) Signed-off-by: Liu Bo --- v2: add a wrapper API for slab destruction to make decoder only work in particular path. fs/btrfs/extent_io.c | 26 -- fs/btrfs/extent_map.c| 13 - include/linux/slab.h |2 ++ include/linux/slab_def.h |1 + include/linux/slub_def.h |1 + mm/slab_common.c | 17 - mm/slub.c|2 ++ 7 files changed, 58 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bcc8dff..355c7fc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -63,6 +63,26 @@ tree_fs_info(struct extent_io_tree *tree) return btrfs_sb(tree->mapping->host->i_sb); } +static void extent_state_leak_decoder(void *object) +{ + struct extent_state *state = object; + + printk(KERN_ERR "btrfs state leak: start %llu end %llu " + "state %lu in tree %p refs %d\n", + (unsigned long long)state->start, + (unsigned long long)state->end, + state->state, state->tree, atomic_read(&state->refs)); +} + +static void extent_buffer_leak_decoder(void *object) +{ + struct extent_buffer *eb = object; + + printk(KERN_ERR "btrfs buffer leak start %llu len %lu " + "refs %d\n", (unsigned long long)eb->start, + eb->len, atomic_read(&eb->refs)); +} + int __init extent_io_init(void) { extent_state_cache = kmem_cache_create("btrfs_extent_state", @@ -115,9 +135,11 @@ void extent_io_exit(void) */ rcu_barrier(); if (extent_state_cache) - kmem_cache_destroy(extent_state_cache); + kmem_cache_destroy_decoder(extent_state_cache, + extent_state_leak_decoder); if (extent_buffer_cache) - kmem_cache_destroy(extent_buffer_cache); + kmem_cache_destroy_decoder(extent_buffer_cache, + extent_buffer_leak_decoder); } void extent_io_tree_init(struct extent_io_tree *tree, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f359e4c..bccba3d 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -16,6 +16,16 @@ static LIST_HEAD(emaps); static DEFINE_SPINLOCK(map_leak_lock); #endif +static void extent_map_leak_decoder(void *object) +{ + struct extent_map *em = object; + + printk(KERN_ERR "btrfs ext map leak: start %llu len %llu block %llu " + "flags %lu refs %d in tree %d compress %d\n", + em->start, em->len, em->block_start, em->flags, + atomic_read(&em->refs), em->in_tree, (int)em->compress_type); +} + int __init extent_map_init(void) { extent_map_cache = kmem_cache_create("btrfs_extent_map", @@ -39,7 +49,8 @@ void extent_map_exit(void) } if (extent_map_cache) - kmem_cache_destroy(extent_map_cache); + kmem_cache_destroy_decoder(extent_map_cache, + extent_map_leak_decoder); } /** diff --git a/include/linux/slab.h b/include/linux/slab.h index 5d168d7..5c6a8d8 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -114,6 +114,7 @@ struct kmem_cache { const char *name; /* Slab name for sysfs */ int refcount; /* Use counter */ void (*ctor)(void *); /* Called on object slot creation */ + void (*decoder)(void *);/* Called on object slot leak detection */ struct list_head list; /* List of all slab caches on the system */ }; #endif @@ -132,6 +133,7 @@ struct kmem_cache * kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t, unsigned long, void (*)(void *), struct kmem_cache *); void kmem_cache_destroy(struct kmem_cache *); +void kmem_cache_destroy_decoder(struct kmem_cache *, void (*)(void *)); int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8bb6e0e..7ca8309 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -48,6 +48,7 @@ struct kmem_cache { /* constructor func */ void (*ctor)(void *obj); + void (*decoder)(void *obj); /* 4) cache creation/removal */ const char *name; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9db4825..fc18af7 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -93,6 +93,7 @@ struct kmem_cache {
Re: [PATCH] Btrfs: fix crash of starting balance
On Tue, Jan 15, 2013 at 06:59:04PM +0200, Ilya Dryomov wrote: > On Tue, Jan 15, 2013 at 10:47:57PM +0800, Liu Bo wrote: > > We will crash on BUG_ON(ret == -EEXIST) when we do not resume the existing > > balance but attempt to start a new one. > > > > The steps can be: > > 1. start balance > > 2. pause balance > > 3. start balance > > > > Signed-off-by: Liu Bo > > --- > > fs/btrfs/volumes.c |7 ++- > > 1 files changed, 6 insertions(+), 1 deletions(-) > > > > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > > index 5cce6aa..3901654 100644 > > --- a/fs/btrfs/volumes.c > > +++ b/fs/btrfs/volumes.c > > @@ -3100,7 +3100,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl, > > goto out; > > > > if (!(bctl->flags & BTRFS_BALANCE_RESUME)) { > > - BUG_ON(ret == -EEXIST); > > + /* > > +* This can happen when we do not resume the existing balance > > +* but try to start a new one instead. > > +*/ > > + if (ret == -EEXIST) > > + goto out; > > set_balance_control(bctl); > > } else { > > BUG_ON(ret != -EEXIST); > > OK, it seems balance pause/resume logic got broken by dev-replace code > (5ac00addc7ac09110995fe967071d191b5981cc1), which went into v3.8-rc1. > This is most certainly not the right way to fix it, that BUG_ON is there > for a reason. I'll send a fix in a couple of days. Okay, right here waiting for test ;) thanks, liubo -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mm/slab: add a leak decoder callback
On Tue, Jan 15, 2013 at 09:01:05AM -0800, Zach Brown wrote: > > The merge processing occurs during kmem_cache_create and you are setting > > up the decoder field afterwards! Wont work. > > In the thread I suggested providing the callback at destruction: > > http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg21130.html > > I liked that it limits accesibility of the callback to the only path > that uses it. Well, I was trying to avoid API change, but seems we have to, I'll update the patch as your suggestion in the next version. thanks, liubo -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mm/slab: add a leak decoder callback
On Tue, Jan 15, 2013 at 04:30:52PM +, Christoph Lameter wrote: > On Mon, 14 Jan 2013, Liu Bo wrote: > > > This adds a leak decoder callback so that kmem_cache_destroy() > > can use to generate debugging output for the allocated objects. > > Interesting idea. > > > @@ -3787,6 +3789,9 @@ static int slab_unmergeable(struct kmem_cache *s) > > if (s->ctor) > > return 1; > > > > + if (s->decoder) > > + return 1; > > + > > /* > > * We may have set a slab to be unmergeable during bootstrap. > > */ > > The merge processing occurs during kmem_cache_create and you are setting > up the decoder field afterwards! Wont work. You're right, I miss the lock part. thanks, liubo -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs for files > 10GB = random spontaneous CRC failure.
On Tue, Jan 15, 2013 at 04:32:10PM -0700, Tom Kusmierz wrote: > Chris & all, > > Sorry for not replying for that long but Chris old friend "stress.sh" > have proven that all my storage is affected with this bug and first > thing was to bring everything down before corruptions will spread any > further. Anyway for subject sake btrfs stress have failed after 2h, ext4 > stress have failed after 8h (according to "time ./stress.sh blablabla" ) > - so it might be related to that ext4 always seamed slower on my machine > than btrfs. Ok, great. These problems are really hard to debug, and I'm glad we've nailed it down to the lower layers. > > > Anyway I wanted to use this opportunity to thank Chris and everybody > related to btrfs development - your file system found a hidden bug in my > set up that would be there until it would pretty much corrupt > everything. I don't even want to think how much my main storage got > corrupted over time (etx4 over lvm over md raid 5). > > p.s. bizzare that when I "fill" ext4 partition with test data everything > check's up OK (crc over all files), but with Chris tool it gets > corrupted - for both Adaptec crappy pcie controller and for mother board > built in one. One really hard part of tracking down corruptions is that our boxes have so much ram right now that they are often hidden by the page cache. My first advice is to boot with much less ram (1G/2G) or pin down all your ram for testing. A problem that triggers in 10 minutes is a billion times easier to figure out than one that triggers in 8 hours. > Also since courses of history proven that my testing > facilities are crap - any suggestion's on how can I test ram, cpu & > controller would be appreciated. Step one is to figure out if you've got a CPU/memory problem or an IO problem. memtest is often able to find CPU and memory problems, but if you pass memtest I like to use gcc for extra hard testing. If you have the ram, make a copy of the linux kernel tree in /dev/shm or any ramdisk/tmpfs mount. Then run make -j ; make clean in a loop until your box either crashes, gcc reports an internal compiler error, or 16 hours go by. Your loop will need to check for failed makes and stop once you get the first failure. Hopefully that will catch it. Otherwise we need to look at the IO stack. -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs for files > 10GB = random spontaneous CRC failure.
On 14/01/13 16:34, Chris Mason wrote: On Mon, Jan 14, 2013 at 09:32:25AM -0700, Tomasz Kusmierz wrote: On 14/01/13 15:57, Chris Mason wrote: On Mon, Jan 14, 2013 at 08:22:36AM -0700, Tomasz Kusmierz wrote: On 14/01/13 14:59, Chris Mason wrote: On Mon, Jan 14, 2013 at 04:09:47AM -0700, Tomasz Kusmierz wrote: Hi, Since I had some free time over Christmas, I decided to conduct few tests over btrFS to se how it will cope with "real life storage" for normal "gray users" and I've found that filesystem will always mess up your files that are larger than 10GB. Hi Tom, I'd like to nail down the test case a little better. 1) Create on one drive, fill with data 2) Add a second drive, convert to raid1 3) find corruptions? What happens if you start with two drives in raid1? In other words, I'm trying to see if this is a problem with the conversion code. -chris Ok, my description might be a bit enigmatic so to cut long story short tests are: 1) create a single drive default btrfs volume on single partition -> fill with test data -> scrub -> admire errors. 2) create a raid1 (-d raid1 -m raid1) volume with two partitions on separate disk, each same size etc. -> fill with test data -> scrub -> admire errors. 3) create a raid10 (-d raid10 -m raid1) volume with four partitions on separate disk, each same size etc. -> fill with test data -> scrub -> admire errors. all disks are same age + size + model ... two different batches to avoid same time failure. Ok, so we have two possible causes. #1 btrfs is writing garbage to your disks. #2 something in your kernel is corrupting your data. Since you're able to see this 100% of the time, lets assume that if #2 were true, we'd be able to trigger it on other filesystems. So, I've attached an old friend, stress.sh. Use it like this: stress.sh -n 5 -c -s It will run in a loop with 5 parallel processes and make 5 copies of your data set into the destination. It will run forever until there are errors. You can use a higher process count (-n) to force more concurrency and use more ram. It may help to pin down all but 2 or 3 GB of your memory. What I'd like you to do is find a data set and command line that make the script find errors on btrfs. Then, try the same thing on xfs or ext4 and let it run at least twice as long. Then report back ;) -chris Chris, Will do, just please be remember that 2TB of test data on "customer grade" sata drives will take a while to test :) Many thanks. You might want to start with a smaller data set, 20GB or so total. -chris Chris & all, Sorry for not replying for that long but Chris old friend "stress.sh" have proven that all my storage is affected with this bug and first thing was to bring everything down before corruptions will spread any further. Anyway for subject sake btrfs stress have failed after 2h, ext4 stress have failed after 8h (according to "time ./stress.sh blablabla" ) - so it might be related to that ext4 always seamed slower on my machine than btrfs. Anyway I wanted to use this opportunity to thank Chris and everybody related to btrfs development - your file system found a hidden bug in my set up that would be there until it would pretty much corrupt everything. I don't even want to think how much my main storage got corrupted over time (etx4 over lvm over md raid 5). p.s. bizzare that when I "fill" ext4 partition with test data everything check's up OK (crc over all files), but with Chris tool it gets corrupted - for both Adaptec crappy pcie controller and for mother board built in one. Also since courses of history proven that my testing facilities are crap - any suggestion's on how can I test ram, cpu & controller would be appreciated. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Rendering a btrfs filesystem unmountable with the btrfs command
On Tue, Jan 15, 2013 at 12:44:36PM -0800, hop...@omnifarious.org wrote: > Help! BTW, I'm not a mailing list subscriber. Here is a sample fix that I believe will allow the offending filesystem to at least be mounted. It hasn't been tested at all. diff -ur kernel-3.7.fc17.orig/linux-3.7.2-201.fc17.x86_64/fs/btrfs/qgroup.c kernel-3.7.fc17/linux-3.7.2-201.fc17.x86_64/fs/btrfs/qgroup.c --- kernel-3.7.fc17.orig/linux-3.7.2-201.fc17.x86_64/fs/btrfs/qgroup.c 2012-12-10 19:30:57.0 -0800 +++ kernel-3.7.fc17/linux-3.7.2-201.fc17.x86_64/fs/btrfs/qgroup.c 2013-01-15 14:02:39.643630137 -0800 @@ -379,8 +379,13 @@ ret = add_relation_rb(fs_info, found_key.objectid, found_key.offset); - if (ret) + /* Failing to add a relation because one side or the other +* doesn't exist isn't a fatal error here. */ + if (ret) { + if (ret == -ENOENT) + ret = ENOENT; goto out; + } next2: ret = btrfs_next_item(quota_root, path); if (ret < 0) -- "They who can give up essential liberty to obtain a little temporary safety, deserve neither liberty nor safety." -- Benjanmin Franklin -- Eric Hopper (http://www.omnifarious.org/~hopper) -- pgpGWvt3YS3N_.pgp Description: PGP signature
Rendering a btrfs filesystem unmountable with the btrfs command
mkfs.btrfs /dev/sdb mkdir /tmp/mnt mount /dev/sdb /tmp/mnt cd /tmp/mnt btrfs quota enable . btrfs subvol create foo btrfs qgroup create 1/0 btrfs qgroup assign 0/257 1/0 btrfs subvol snapshot foo bar btrfs qgroup assign 0/258 1/0 cd .. umount /dev/sdb mount /dev/sdb /tmp/mnt # Still mountable! cd mnt btrfs qgroup destroy 1/0 cd .. umount /dev/sdb mount /dev/sdb /tmp/mnt # Oops, no longer mountable, even in recovery mode! Help! BTW, I'm not a mailing list subscriber. Thanks, -- "They who can give up essential liberty to obtain a little temporary safety, deserve neither liberty nor safety." -- Benjanmin Franklin -- Eric Hopper (http://www.omnifarious.org/~hopper) -- pgpcx5l3K2qKS.pgp Description: PGP signature
Re: [PATCH 00/11 V3] add show command to the subvol sub command
Hi Anand On 01/15/2013 09:05 AM, Anand Jain wrote: > > Goffredo, > > Thanks for the review. > >> I expected also from test1 and test2 something. > > actually it is working as intended. which is > to show more info of any item under btrfs su list output, > and root itself won't be in the btrfs su list (unless > -a option is used), otherwise any suggestion what > is good to have for btrfs su show / ? If I understood correctly, btrfs su show is capable to lists the subvolume snapshots. It could be useful to list the snapshot of the root subvolume. > > Anand > > On 01/15/2013 02:25 AM, Goffredo Baroncelli wrote: >> On 01/14/2013 05:04 AM, Anand Jain wrote: >>> >>> >>> Any comments on this new sub-command, please. ? >>> >>> Thanks, Anand >>> >> I am trying to use this new command. Very nice. However I tried to use >> it against the root of filesystem, without success: >> >> The root of filesystem is under /var/btrfs; I used a subvolume as root: >> >> $ cat /proc/self/mountinfo | grep sdc3 >> 19 1 0:15 /__active / rw,noatime,nodiratime - btrfs /dev/sdc3 >> rw,space_cache >> 25 19 0:15 / /var/btrfs rw,noatime,nodiratime - btrfs /dev/sdc3 >> rw,space_cache >> >> >> If I do: >> >> $ #test 1 >> $ sudo ./btrfs su show / >> >> I got nothing >> >> If I do >> >> $ #test 2 >> $ sudo ./btrfs su show /var/btrfs/ >> >> still, I got nothing >> >> $ #test 3 >> $ sudo ./btrfs su show /var/btrfs/__active >> >> I, finally, got: >> >> /var/btrfs/__active >> uuid: 835c96b8-c066-554b-9230-1c531e831ff6 >> Parent uuid: - >> Creation time: - >> Object ID: 256 >> Generation (Gen): 75774 >> Gen at creation: 0 >> Parent: 5 >> Top Level: 5 >> Snapshot(s): >> >> >> I expected also from test1 and test2 something. >> >> BR >> G.Baroncelli >> >> >> >> >> > -- gpg @keyserver.linux.it: Goffredo Baroncelli (kreijackATinwind.it> Key fingerprint BBF5 1610 0B64 DAC6 5F7D 17B2 0EDA 9B37 8B82 E0B5 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Can moving data to a subvolume not take as long as a fully copy?
On Tue, Jan 15, 2013 at 8:49 AM, Marc MERLIN wrote: > On Mon, Jan 14, 2013 at 10:48:50PM -0800, David Brown wrote: >> Why not make a snapshot of the root volume, and then delete the files >> you want to move from the original root, and delete the rest of root >> from the snapshot? > > Are a snapshot of the root volume and a subvolume effectively the same thing > as far as btrfs sees them? > Once I have that snapshot which I'll treat as a subvolume, can I then > snapshot that snapshot/subvolume further? > Yes, the product of the btrfs snapshot command is a subvolume. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mm/slab: add a leak decoder callback
> The merge processing occurs during kmem_cache_create and you are setting > up the decoder field afterwards! Wont work. In the thread I suggested providing the callback at destruction: http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg21130.html I liked that it limits accesibility of the callback to the only path that uses it. - z -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: fix crash of starting balance
On Tue, Jan 15, 2013 at 10:47:57PM +0800, Liu Bo wrote: > We will crash on BUG_ON(ret == -EEXIST) when we do not resume the existing > balance but attempt to start a new one. > > The steps can be: > 1. start balance > 2. pause balance > 3. start balance > > Signed-off-by: Liu Bo > --- > fs/btrfs/volumes.c |7 ++- > 1 files changed, 6 insertions(+), 1 deletions(-) > > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 5cce6aa..3901654 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -3100,7 +3100,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl, > goto out; > > if (!(bctl->flags & BTRFS_BALANCE_RESUME)) { > - BUG_ON(ret == -EEXIST); > + /* > + * This can happen when we do not resume the existing balance > + * but try to start a new one instead. > + */ > + if (ret == -EEXIST) > + goto out; > set_balance_control(bctl); > } else { > BUG_ON(ret != -EEXIST); OK, it seems balance pause/resume logic got broken by dev-replace code (5ac00addc7ac09110995fe967071d191b5981cc1), which went into v3.8-rc1. This is most certainly not the right way to fix it, that BUG_ON is there for a reason. I'll send a fix in a couple of days. Thanks, Ilya > -- > 1.7.7.6 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs for files > 10GB = random spontaneous CRC failure.
Hi, i had a similar scenario like Tomasz: - Started with single 3TB Disk. - Filled the 3TB Disk with a lot of files (more than 30 with 10-30GB) - Added 2x 1,5TB Disks - btrfs balance start dconvert=raid1 mconvert=raid1 $MOUNT - # btrfs scrub start $MOUNT - # btrfs scrub status $MOUNT scrub status for $ID scrub started at Tue Jan 15 07:10:15 2013 and finished after 24020 seconds total bytes scrubbed: 4.30TB with 0 errors so at least it is no general bug in btrfs - maybe this helps you... # uname -a Linux n40l 3.7.2 #1 SMP Sun Jan 13 11:46:56 CET 2013 x86_64 GNU/Linux # btrfs version Btrfs v0.20-rc1-37-g91d9ee Regards Lars Am 14.01.2013 17:34, schrieb Chris Mason: On Mon, Jan 14, 2013 at 09:32:25AM -0700, Tomasz Kusmierz wrote: On 14/01/13 15:57, Chris Mason wrote: On Mon, Jan 14, 2013 at 08:22:36AM -0700, Tomasz Kusmierz wrote: On 14/01/13 14:59, Chris Mason wrote: On Mon, Jan 14, 2013 at 04:09:47AM -0700, Tomasz Kusmierz wrote: Hi, Since I had some free time over Christmas, I decided to conduct few tests over btrFS to se how it will cope with "real life storage" for normal "gray users" and I've found that filesystem will always mess up your files that are larger than 10GB. Hi Tom, I'd like to nail down the test case a little better. 1) Create on one drive, fill with data 2) Add a second drive, convert to raid1 3) find corruptions? What happens if you start with two drives in raid1? In other words, I'm trying to see if this is a problem with the conversion code. -chris Ok, my description might be a bit enigmatic so to cut long story short tests are: 1) create a single drive default btrfs volume on single partition -> fill with test data -> scrub -> admire errors. 2) create a raid1 (-d raid1 -m raid1) volume with two partitions on separate disk, each same size etc. -> fill with test data -> scrub -> admire errors. 3) create a raid10 (-d raid10 -m raid1) volume with four partitions on separate disk, each same size etc. -> fill with test data -> scrub -> admire errors. all disks are same age + size + model ... two different batches to avoid same time failure. Ok, so we have two possible causes. #1 btrfs is writing garbage to your disks. #2 something in your kernel is corrupting your data. Since you're able to see this 100% of the time, lets assume that if #2 were true, we'd be able to trigger it on other filesystems. So, I've attached an old friend, stress.sh. Use it like this: stress.sh -n 5 -c -s It will run in a loop with 5 parallel processes and make 5 copies of your data set into the destination. It will run forever until there are errors. You can use a higher process count (-n) to force more concurrency and use more ram. It may help to pin down all but 2 or 3 GB of your memory. What I'd like you to do is find a data set and command line that make the script find errors on btrfs. Then, try the same thing on xfs or ext4 and let it run at least twice as long. Then report back ;) -chris Chris, Will do, just please be remember that 2TB of test data on "customer grade" sata drives will take a while to test :) Many thanks. You might want to start with a smaller data set, 20GB or so total. -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- ADC-Ingenieurbüro Wiedemann | In der Borngasse 12 | 57520 Friedewald | Tel: 02743-930233 | Fax: 02743-930235 | www.adc-wiedemann.de GF: Dipl.-Ing. Hendrik Wiedemann | Umsatzsteuer-ID: DE 147979431 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mm/slab: add a leak decoder callback
On Mon, 14 Jan 2013, Liu Bo wrote: > This adds a leak decoder callback so that kmem_cache_destroy() > can use to generate debugging output for the allocated objects. Interesting idea. > @@ -3787,6 +3789,9 @@ static int slab_unmergeable(struct kmem_cache *s) > if (s->ctor) > return 1; > > + if (s->decoder) > + return 1; > + > /* >* We may have set a slab to be unmergeable during bootstrap. >*/ The merge processing occurs during kmem_cache_create and you are setting up the decoder field afterwards! Wont work. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: abort transaction and RO
Simply mounting and umounting the device will now *always* crash the kernel. Logs of a 3.8-git debug kernel are below. I am not at all familiar with the btrfs code, but can't we simply abort the transaction and return -EIO instead of BUG_ON()? All those BUG_ON()s look scary... Having a failed filesystem is already bad, but crashing the entire system is not a good idea, IMHO. [ 182.265817] Btrfs loaded [ 182.267296] device fsid acb7d446-e3b7-4e1e-9ae3-6bbced79e115 devid 1 transid 154 /dev/vdd [ 182.269821] btrfs: disk space caching is enabled [ 193.530067] btrfs: csum mismatch on free space cache [ 193.530354] btrfs: failed to load free space cache for block group 29360128 [ 193.531829] leaf 29368320 total ptrs 49 free space 344 [ 193.532142] item 0 key (0 c0 4194304) itemoff 3971 itemsize 24 [ 193.532397] block group used 0 [ 193.532682] item 1 key (4194304 c0 8388608) itemoff 3947 itemsize 24 [ 193.532950] block group used 0 [ 193.533161] item 2 key (12582912 c0 8388608) itemoff 3923 itemsize 24 [ 193.533429] block group used 7917568 [ 193.533653] item 3 key (12636160 a8 53248) itemoff 3870 itemsize 53 [ 193.533914] extent refs 1 gen 153 flags 1 [ 193.534149] extent data backref root 5 objectid 18446744073709551604 offset 0 count 1 [ 193.534554] item 4 key (12689408 a8 262144) itemoff 3817 itemsize 53 [ 193.534818] extent refs 1 gen 153 flags 1 [ 193.535044] extent data backref root 1 objectid 256 offset 0 count 1 [ 193.535322] item 5 key (13213696 a8 262144) itemoff 3764 itemsize 53 [ 193.535608] extent refs 1 gen 150 flags 1 [ 193.535846] extent data backref root 1 objectid 273 offset 0 count 1 [ 193.536166] item 6 key (13475840 a8 262144) itemoff 3711 itemsize 53 [ 193.536435] extent refs 1 gen 148 flags 1 [ 193.536661] extent data backref root 1 objectid 259 offset 0 count 1 [ 193.536926] item 7 key (13737984 a8 262144) itemoff 3658 itemsize 53 [ 193.537193] extent refs 1 gen 148 flags 1 [ 193.537427] extent data backref root 1 objectid 260 offset 0 count 1 [ 193.537698] item 8 key (14000128 a8 262144) itemoff 3605 itemsize 53 [ 193.537976] extent refs 1 gen 148 flags 1 [ 193.538210] extent data backref root 1 objectid 261 offset 0 count 1 [ 193.538479] item 9 key (14262272 a8 262144) itemoff 3552 itemsize 53 [ 193.538742] extent refs 1 gen 148 flags 1 [ 193.538971] extent data backref root 1 objectid 257 offset 0 count 1 [ 193.539238] item 10 key (14524416 a8 262144) itemoff 3499 itemsize 53 [ 193.539506] extent refs 1 gen 148 flags 1 [ 193.539733] extent data backref root 1 objectid 262 offset 0 count 1 [ 193.539996] item 11 key (14786560 a8 262144) itemoff 3446 itemsize 53 [ 193.540298] extent refs 1 gen 148 flags 1 [ 193.540527] extent data backref root 1 objectid 263 offset 0 count 1 [ 193.540795] item 12 key (15048704 a8 262144) itemoff 3393 itemsize 53 [ 193.541063] extent refs 1 gen 148 flags 1 [ 193.541297] extent data backref root 1 objectid 264 offset 0 count 1 [ 193.541564] item 13 key (15310848 a8 262144) itemoff 3340 itemsize 53 [ 193.541830] extent refs 1 gen 149 flags 1 [ 193.542063] extent data backref root 1 objectid 265 offset 0 count 1 [ 193.542330] item 14 key (15572992 a8 262144) itemoff 3287 itemsize 53 [ 193.542595] extent refs 1 gen 149 flags 1 [ 193.542824] extent data backref root 1 objectid 266 offset 0 count 1 [ 193.543085] item 15 key (15835136 a8 262144) itemoff 3234 itemsize 53 [ 193.543352] extent refs 1 gen 149 flags 1 [ 193.543577] extent data backref root 1 objectid 267 offset 0 count 1 [ 193.543844] item 16 key (16097280 a8 262144) itemoff 3181 itemsize 53 [ 193.544132] extent refs 1 gen 149 flags 1 [ 193.544364] extent data backref root 1 objectid 268 offset 0 count 1 [ 193.544628] item 17 key (16359424 a8 262144) itemoff 3128 itemsize 53 [ 193.544894] extent refs 1 gen 149 flags 1 [ 193.545120] extent data backref root 1 objectid 269 offset 0 count 1 [ 193.545387] item 18 key (16621568 a8 262144) itemoff 3075 itemsize 53 [ 193.545654] extent refs 1 gen 149 flags 1 [ 193.545883] extent data backref root 1 objectid 270 offset 0 count 1 [ 193.546152] item 19 key (16883712 a8 262144) itemoff 3022 itemsize 53 [ 193.546418] extent refs 1 gen 149 flags 1 [ 193.546646] extent data backref root 1 objectid 271 offset 0 count 1 [ 193.546903] item 20 key (17145856 a8 262144) itemoff 2969 itemsize 53 [ 193.547184] extent refs 1 gen 149 flags 1 [ 193.547413] extent data backref root 1 objectid 272 offset 0 count 1 [ 193.547703] item 21 key (17408000 a8 262144) itemoff 2916 itemsize 53 [ 193.547967] extent refs 1 gen 151 flags 1 [
[PATCH] Btrfs: fix crash of starting balance
We will crash on BUG_ON(ret == -EEXIST) when we do not resume the existing balance but attempt to start a new one. The steps can be: 1. start balance 2. pause balance 3. start balance Signed-off-by: Liu Bo --- fs/btrfs/volumes.c |7 ++- 1 files changed, 6 insertions(+), 1 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cce6aa..3901654 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3100,7 +3100,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl, goto out; if (!(bctl->flags & BTRFS_BALANCE_RESUME)) { - BUG_ON(ret == -EEXIST); + /* +* This can happen when we do not resume the existing balance +* but try to start a new one instead. +*/ + if (ret == -EEXIST) + goto out; set_balance_control(bctl); } else { BUG_ON(ret != -EEXIST); -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Can moving data to a subvolume not take as long as a fully copy?
On Mon, Jan 14, 2013 at 10:48:50PM -0800, David Brown wrote: > Why not make a snapshot of the root volume, and then delete the files > you want to move from the original root, and delete the rest of root > from the snapshot? Are a snapshot of the root volume and a subvolume effectively the same thing as far as btrfs sees them? Once I have that snapshot which I'll treat as a subvolume, can I then snapshot that snapshot/subvolume further? If so, that's definitely a good way of doing this for next time. Thanks, Marc -- "A mouse is a device used to point at the xterm you want to type in" - A.S.R. Microsoft is to operating systems what McDonalds is to gourmet cooking Home page: http://marc.merlins.org/ -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: scrub questtion
On Tue, Jan 15, 2013 at 8:21 AM, Gene Czarcinski wrote: > When you start btrfs scrub and point at one subvolume, what is "scrubbed"? > > Just that subvolume or the entire volume? The entire volume. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
scrub questtion
When you start btrfs scrub and point at one subvolume, what is "scrubbed"? Just that subvolume or the entire volume? Gene -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: abort transaction and RO
On 01/15/2013 02:35 PM, Bernd Schubert wrote: Hrmm, that bug then seems to cause another bug. After the file system went into RO, I simply umounted and mounted again and a few seconds after that my entire system failed. Relevant logs are attached. Further log attachment: btrfsck /dev/vg_fuj2/test >/tmp/btrfs.log 2>&1 checking extents ref mismatch on [12689408 262144] extent item 1, found 0 Incorrect local backref count on 12689408 root 1 owner 256 offset 0 found 0 wanted 1 back 0x1869ef0 backpointer mismatch on [12689408 262144] owner ref check failed [12689408 262144] ref mismatch on [12951552 262144] extent item 0, found 1 Backref 12951552 root 1 owner 256 offset 0 num_refs 0 not found in extent tree Incorrect local backref count on 12951552 root 1 owner 256 offset 0 found 1 wanted 0 back 0x185faa0 backpointer mismatch on [12951552 262144] ref mismatch on [13213696 262144] extent item 1, found 0 Incorrect local backref count on 13213696 root 1 owner 273 offset 0 found 0 wanted 1 back 0x1869fd0 backpointer mismatch on [13213696 262144] owner ref check failed [13213696 262144] ref mismatch on [13475840 262144] extent item 1, found 0 Incorrect local backref count on 13475840 root 1 owner 259 offset 0 found 0 wanted 1 back 0x186a0b0 backpointer mismatch on [13475840 262144] owner ref check failed [13475840 262144] ref mismatch on [13737984 262144] extent item 1, found 0 Incorrect local backref count on 13737984 root 1 owner 260 offset 0 found 0 wanted 1 back 0x186a190 backpointer mismatch on [13737984 262144] owner ref check failed [13737984 262144] ref mismatch on [14000128 262144] extent item 1, found 0 Incorrect local backref count on 14000128 root 1 owner 261 offset 0 found 0 wanted 1 back 0x186a270 backpointer mismatch on [14000128 262144] owner ref check failed [14000128 262144] ref mismatch on [14262272 262144] extent item 1, found 0 Incorrect local backref count on 14262272 root 1 owner 257 offset 0 found 0 wanted 1 back 0x186a350 backpointer mismatch on [14262272 262144] owner ref check failed [14262272 262144] ref mismatch on [14524416 262144] extent item 1, found 0 Incorrect local backref count on 14524416 root 1 owner 262 offset 0 found 0 wanted 1 back 0x186a430 backpointer mismatch on [14524416 262144] owner ref check failed [14524416 262144] ref mismatch on [14786560 262144] extent item 1, found 0 Incorrect local backref count on 14786560 root 1 owner 263 offset 0 found 0 wanted 1 back 0x186a510 backpointer mismatch on [14786560 262144] owner ref check failed [14786560 262144] ref mismatch on [15048704 262144] extent item 1, found 0 Incorrect local backref count on 15048704 root 1 owner 264 offset 0 found 0 wanted 1 back 0x186a5f0 backpointer mismatch on [15048704 262144] owner ref check failed [15048704 262144] ref mismatch on [15310848 262144] extent item 1, found 0 Incorrect local backref count on 15310848 root 1 owner 265 offset 0 found 0 wanted 1 back 0x186a6d0 backpointer mismatch on [15310848 262144] owner ref check failed [15310848 262144] ref mismatch on [15572992 262144] extent item 1, found 0 Incorrect local backref count on 15572992 root 1 owner 266 offset 0 found 0 wanted 1 back 0x186a7b0 backpointer mismatch on [15572992 262144] owner ref check failed [15572992 262144] ref mismatch on [15835136 262144] extent item 1, found 0 Incorrect local backref count on 15835136 root 1 owner 267 offset 0 found 0 wanted 1 back 0x186a890 backpointer mismatch on [15835136 262144] owner ref check failed [15835136 262144] ref mismatch on [16097280 262144] extent item 1, found 0 Incorrect local backref count on 16097280 root 1 owner 268 offset 0 found 0 wanted 1 back 0x186a970 backpointer mismatch on [16097280 262144] owner ref check failed [16097280 262144] ref mismatch on [16359424 262144] extent item 1, found 0 Incorrect local backref count on 16359424 root 1 owner 269 offset 0 found 0 wanted 1 back 0x186aa50 backpointer mismatch on [16359424 262144] owner ref check failed [16359424 262144] ref mismatch on [16621568 262144] extent item 1, found 0 Incorrect local backref count on 16621568 root 1 owner 270 offset 0 found 0 wanted 1 back 0x186ab30 backpointer mismatch on [16621568 262144] owner ref check failed [16621568 262144] ref mismatch on [16883712 262144] extent item 1, found 0 Incorrect local backref count on 16883712 root 1 owner 271 offset 0 found 0 wanted 1 back 0x186ac10 backpointer mismatch on [16883712 262144] owner ref check failed [16883712 262144] ref mismatch on [17145856 262144] extent item 1, found 0 Incorrect local backref count on 17145856 root 1 owner 272 offset 0 found 0 wanted 1 back 0x186acf0 backpointer mismatch on [17145856 262144] owner ref check failed [17145856 262144] ref mismatch on [17408000 262144] extent item 1, found 0 Incorrect local backref count on 17408000 root 1 owner 281 offset 0 found 0 wanted 1 back 0x186add0 backpointer mismatch on [17408000 262144] owner ref check failed [17408000 262144] ref mismatch on [17670144 262144] extent item 1, found 0
Re: [PATCH 2/3] btrfs-progs: libify some parts of btrfs-progs
On Tue, Jan 15, 2013 at 02:46:50PM +0200, Ilya Dryomov wrote: > On Mon, Jan 14, 2013 at 03:18:14PM +0100, Arvin Schnell wrote: > > > > Hi, > > > > please find attached a patch to make the new libbtrfs usable from > > C++ (at least for the parts snapper will likely need). > Why exactly do we need this in a non-header file? Oh, that's clearly a mistake. Thanks for noticing. Regards, Arvin -- Arvin Schnell, Senior Software Engineer, Research & Development SUSE LINUX Products GmbH, GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer, HRB 16746 (AG Nürnberg) Maxfeldstraße 5 90409 Nürnberg Germany -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] btrfs-progs: libify some parts of btrfs-progs
On Mon, Jan 14, 2013 at 03:18:14PM +0100, Arvin Schnell wrote: > > Hi, > > please find attached a patch to make the new libbtrfs usable from > C++ (at least for the parts snapper will likely need). > > Regards, > Arvin > > -- > Arvin Schnell, > Senior Software Engineer, Research & Development > SUSE LINUX Products GmbH, GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer, > HRB 16746 (AG Nürnberg) > Maxfeldstraße 5 > 90409 Nürnberg > Germany > diff --git a/cmds-send.c b/cmds-send.c > index 9b47e70..c51310a 100644 > --- a/cmds-send.c > +++ b/cmds-send.c > @@ -40,6 +40,10 @@ > #include "send.h" > #include "send-utils.h" > > +#ifdef __cplusplus > +extern "C" { > +#endif > + > static int g_verbose = 0; > > struct btrfs_send { > @@ -654,3 +658,7 @@ int cmd_send(int argc, char **argv) > { > return cmd_send_start(argc, argv); > } > + > +#ifdef __cplusplus > +} > +#endif Hi Arvin, Why exactly do we need this in a non-header file? Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] btrfs-progs: libify some parts of btrfs-progs
better if its there (man libbtrfs). pls create if you could. Thanks, Anand On 01/15/2013 02:10 AM, Mark Fasheh wrote: On Mon, Jan 14, 2013 at 11:43:44AM +0800, Anand Jain wrote: Mark, Good to create man libbtrfs ? Are you asking if you should do this? If so yeah for sure, I won't complain about sharing the work! If you're asking whether I should, I'm not sure. I suppose it's probably a good idea :) I'll have to look at other libfs manpages first to get an idea of what goes in there. At any rate, thanks :) --Mark -- Mark Fasheh -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html