[PATCH v5 04/11] btrfs: remove bio splitting and merge_bvec_fn() calls

2015-07-06 Thread Ming Lin
From: Kent Overstreet 

Btrfs has been doing bio splitting from btrfs_map_bio(), by checking
device limits as well as calling ->merge_bvec_fn() etc. That is not
necessary any more, because generic_make_request() is now able to
handle arbitrarily sized bios. So clean up unnecessary code paths.

Cc: Chris Mason 
Cc: Josef Bacik 
Cc: linux-btrfs@vger.kernel.org
Signed-off-by: Kent Overstreet 
Signed-off-by: Chris Mason 
[dpark: add more description in commit message]
Signed-off-by: Dongsu Park 
Signed-off-by: Ming Lin 
---
 fs/btrfs/volumes.c | 72 --
 1 file changed, 72 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4b438b4..fd25b81 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5833,34 +5833,6 @@ static noinline void btrfs_schedule_bio(struct 
btrfs_root *root,
 &device->work);
 }
 
-static int bio_size_ok(struct block_device *bdev, struct bio *bio,
-  sector_t sector)
-{
-   struct bio_vec *prev;
-   struct request_queue *q = bdev_get_queue(bdev);
-   unsigned int max_sectors = queue_max_sectors(q);
-   struct bvec_merge_data bvm = {
-   .bi_bdev = bdev,
-   .bi_sector = sector,
-   .bi_rw = bio->bi_rw,
-   };
-
-   if (WARN_ON(bio->bi_vcnt == 0))
-   return 1;
-
-   prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
-   if (bio_sectors(bio) > max_sectors)
-   return 0;
-
-   if (!q->merge_bvec_fn)
-   return 1;
-
-   bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len;
-   if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
-   return 0;
-   return 1;
-}
-
 static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
  struct bio *bio, u64 physical, int dev_nr,
  int rw, int async)
@@ -5894,38 +5866,6 @@ static void submit_stripe_bio(struct btrfs_root *root, 
struct btrfs_bio *bbio,
btrfsic_submit_bio(rw, bio);
 }
 
-static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
- struct bio *first_bio, struct btrfs_device *dev,
- int dev_nr, int rw, int async)
-{
-   struct bio_vec *bvec = first_bio->bi_io_vec;
-   struct bio *bio;
-   int nr_vecs = bio_get_nr_vecs(dev->bdev);
-   u64 physical = bbio->stripes[dev_nr].physical;
-
-again:
-   bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS);
-   if (!bio)
-   return -ENOMEM;
-
-   while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
-   if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
-bvec->bv_offset) < bvec->bv_len) {
-   u64 len = bio->bi_iter.bi_size;
-
-   atomic_inc(&bbio->stripes_pending);
-   submit_stripe_bio(root, bbio, bio, physical, dev_nr,
- rw, async);
-   physical += len;
-   goto again;
-   }
-   bvec++;
-   }
-
-   submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async);
-   return 0;
-}
-
 static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 {
atomic_inc(&bbio->error);
@@ -5998,18 +5938,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, 
struct bio *bio,
continue;
}
 
-   /*
-* Check and see if we're ok with this bio based on it's size
-* and offset with the given device.
-*/
-   if (!bio_size_ok(dev->bdev, first_bio,
-bbio->stripes[dev_nr].physical >> 9)) {
-   ret = breakup_stripe_bio(root, bbio, first_bio, dev,
-dev_nr, rw, async_submit);
-   BUG_ON(ret);
-   continue;
-   }
-
if (dev_nr < total_devs - 1) {
bio = btrfs_bio_clone(first_bio, GFP_NOFS);
BUG_ON(!bio); /* -ENOMEM */
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH V11 13/21] Btrfs: subpagesize-blocksize: Deal with partial ordered extent allocations.

2015-07-06 Thread Liu Bo
On Mon, Jun 01, 2015 at 08:52:48PM +0530, Chandan Rajendra wrote:
> In subpagesize-blocksize scenario, extent allocations for only some of the
> dirty blocks of a page can succeed, while allocation for rest of the blocks
> can fail. This patch allows I/O against such partially allocated ordered
> extents to be submitted.
> 
> Signed-off-by: Chandan Rajendra 
> ---
>  fs/btrfs/extent_io.c | 27 ++-
>  fs/btrfs/inode.c | 35 ++-
>  2 files changed, 36 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 0b017e1..0110abc 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -1850,17 +1850,23 @@ int extent_clear_unlock_delalloc(struct inode *inode, 
> u64 start, u64 end,
>   if (page_ops & PAGE_SET_PRIVATE2)
>   SetPagePrivate2(pages[i]);
>  
> + if (page_ops & PAGE_SET_ERROR)
> + SetPageError(pages[i]);
> +
>   if (pages[i] == locked_page) {
>   page_cache_release(pages[i]);
>   continue;
>   }
> - if (page_ops & PAGE_CLEAR_DIRTY)
> +
> + if ((page_ops & PAGE_CLEAR_DIRTY)
> + && !PagePrivate2(pages[i]))
>   clear_page_dirty_for_io(pages[i]);
> - if (page_ops & PAGE_SET_WRITEBACK)
> + if ((page_ops & PAGE_SET_WRITEBACK)
> + && !PagePrivate2(pages[i]))
>   set_page_writeback(pages[i]);
> - if (page_ops & PAGE_SET_ERROR)
> - SetPageError(pages[i]);
> - if (page_ops & PAGE_END_WRITEBACK)
> +
> + if ((page_ops & PAGE_END_WRITEBACK)
> + && !PagePrivate2(pages[i]))
>   end_page_writeback(pages[i]);
>   if (page_ops & PAGE_UNLOCK)
>   unlock_page(pages[i]);
> @@ -2550,7 +2556,7 @@ int end_extent_writepage(struct page *page, int err, 
> u64 start, u64 end)
>   uptodate = 0;
>   }
>  
> - if (!uptodate) {
> + if (!uptodate || PageError(page)) {
>   ClearPageUptodate(page);
>   SetPageError(page);
>   ret = ret < 0 ? ret : -EIO;
> @@ -3340,7 +3346,6 @@ static noinline_for_stack int writepage_delalloc(struct 
> inode *inode,
>  nr_written);
>   /* File system has been set read-only */
>   if (ret) {
> - SetPageError(page);
>   /* fill_delalloc should be return < 0 for error
>* but just in case, we use > 0 here meaning the
>* IO is started, so we don't want to return > 0
> @@ -3561,7 +3566,6 @@ static int __extent_writepage(struct page *page, struct 
> writeback_control *wbc,
>   struct inode *inode = page->mapping->host;
>   struct extent_page_data *epd = data;
>   u64 start = page_offset(page);
> - u64 page_end = start + PAGE_CACHE_SIZE - 1;
>   int ret;
>   int nr = 0;
>   size_t pg_offset = 0;
> @@ -3606,7 +3610,7 @@ static int __extent_writepage(struct page *page, struct 
> writeback_control *wbc,
>   ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
>   if (ret == 1)
>   goto done_unlocked;
> - if (ret)
> + if (ret && !PagePrivate2(page))
>   goto done;
>  
>   ret = __extent_writepage_io(inode, page, wbc, epd,
> @@ -3620,10 +3624,7 @@ done:
>   set_page_writeback(page);
>   end_page_writeback(page);
>   }
> - if (PageError(page)) {
> - ret = ret < 0 ? ret : -EIO;
> - end_extent_writepage(page, ret, start, page_end);
> - }
> +
>   unlock_page(page);
>   return ret;
>  
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 8b4aaed..bff60c6 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -925,6 +925,8 @@ static noinline int cow_file_range(struct inode *inode,
>   struct btrfs_key ins;
>   struct extent_map *em;
>   struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
> + struct btrfs_ordered_extent *ordered;
> + unsigned long page_ops, extent_ops;
>   int ret = 0;
>  
>   if (btrfs_is_free_space_inode(inode)) {
> @@ -969,8 +971,6 @@ static noinline int cow_file_range(struct inode *inode,
>   btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
>  
>   while (disk_num_bytes > 0) {
> - unsigned long op;
> -
>   cur_alloc_size = disk_num_bytes;
>   ret = btrfs_reserve_extent(root, cur_alloc_size,
>  root->sect

Re: [RFC PATCH V11 12/21] Btrfs: subpagesize-blocksize: Search for all ordered extents that could span across a page.

2015-07-06 Thread Chandan Rajendra
On Monday 06 Jul 2015 11:17:38 Liu Bo wrote:
> On Fri, Jul 03, 2015 at 03:38:00PM +0530, Chandan Rajendra wrote:
> > On Wednesday 01 Jul 2015 22:47:10 Liu Bo wrote:
> > > On Mon, Jun 01, 2015 at 08:52:47PM +0530, Chandan Rajendra wrote:
> > > > In subpagesize-blocksize scenario it is not sufficient to search using
> > > > the
> > > > first byte of the page to make sure that there are no ordered extents
> > > > present across the page. Fix this.
> > > > 
> > > > Signed-off-by: Chandan Rajendra 
> > > > ---
> > > > 
> > > >  fs/btrfs/extent_io.c | 3 ++-
> > > >  fs/btrfs/inode.c | 4 ++--
> > > >  2 files changed, 4 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > > > index 14b4e05..0b017e1 100644
> > > > --- a/fs/btrfs/extent_io.c
> > > > +++ b/fs/btrfs/extent_io.c
> > > > @@ -3244,7 +3244,8 @@ static int __extent_read_full_page(struct
> > > > extent_io_tree *tree,>
> > > > 
> > > > while (1) {
> > > > 
> > > > lock_extent(tree, start, end);
> > > > 
> > > > -   ordered = btrfs_lookup_ordered_extent(inode, start);
> > > > +   ordered = btrfs_lookup_ordered_range(inode, start,
> > > > +   PAGE_CACHE_SIZE);
> > > 
> > > A minor suggestion, it'd be better to include the new prototype in the
> > > same patch, which will be benefit to later cherry-picking or reverting.
> > 
> > Liu, The definition of btrfs_lookup_ordered_range() is already part of
> > the mainline kernel.
> 
> Ah, I didn't recognize the difference of btrfs_lookup_ordered_extent and
> btrfs_lookup_ordered_range, sorry.
> 
> > > > if (!ordered)
> > > > 
> > > > break;
> > > > 
> > > > unlock_extent(tree, start, end);
> > > > 
> > > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> > > > index e9bab73..8b4aaed 100644
> > > > --- a/fs/btrfs/inode.c
> > > > +++ b/fs/btrfs/inode.c
> > > > 
> > > > @@ -1976,7 +1976,7 @@ again:
> > > > if (PagePrivate2(page))
> > > > 
> > > > goto out;
> > > > 
> > > > -   ordered = btrfs_lookup_ordered_extent(inode, page_start);
> > > > +   ordered = btrfs_lookup_ordered_range(inode, page_start,
> > > > PAGE_CACHE_SIZE);
> > > > 
> > > > if (ordered) {
> > > > 
> > > > unlock_extent_cached(&BTRFS_I(inode)->io_tree, 
page_start,
> > > > 
> > > >  page_end, &cached_state, 
GFP_NOFS);
> > > > 
> > > > @@ -8513,7 +8513,7 @@ static void btrfs_invalidatepage(struct page
> > > > *page,
> > > > unsigned int offset,>
> > > > 
> > > > if (!inode_evicting)
> > > > 
> > > > lock_extent_bits(tree, page_start, page_end, 0,
> > 
> > &cached_state);
> > 
> > > > -   ordered = btrfs_lookup_ordered_extent(inode, page_start);
> > > > +   ordered = btrfs_lookup_ordered_range(inode, page_start,
> > > > PAGE_CACHE_SIZE);
> 
> It's possible for a page to hold two (or more) ordered extents here, a
> while loop is necessary to ensure that every ordered extent is processed
> properly.
>
Liu, Sorry, I had introduced the loop in the patch
"[RFC PATCH V11 14/21] Btrfs: subpagesize-blocksize: Explicitly Track I/O
status of blocks of an ordered extent". I will pull the loop to this patch for
the next version of that patchset. 

> Thanks,
> 
> -liubo
> 
> > > > if (ordered) {
> > > > 
> > > > /*
> > > > 
> > > >  * IO on this page will never be started, so we need

-- 
chandan

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Any hope of pool recovery?

2015-07-06 Thread Austin S Hemmelgarn

On 2015-07-03 13:51, Chris Murphy wrote:

On Fri, Jul 3, 2015 at 9:05 AM, Donald Pearson
 wrote:


I did some more digging and found that I had a lot of errors basically
every drive.


Ick. Sucks for you but then makes this less of a Btrfs problem because
it can really only do so much if more than the number of spares have
problems. It does suggest a more aggressive need for the volume to go
read only in such cases though, before it gets this corrupt.
I'd almost say this is something that should be configurable.  The 
default should probably be if there have been errors on at least as many 
drives as there are spares, the fs should go read-only; but still 
provide the option to choose between that, going read-only immediately 
on the first error or only going read-only on write errors.

Multiple disk problems like this though suggest a shared hardware
problem like a controller or expander.

I have to agree with this statement, I've seen stuff like this before 
(altho0ugh thankfully not on BTRFS), and 100% of the time the root cause 
was either the storage controller of system RAM.




smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v2] Btrfs-progs: add feature to get mininum size for resizing a fs/device

2015-07-06 Thread fdmanana
From: Filipe Manana 

Currently there is not way for a user to know what is the minimum size a
device of a btrfs filesystem can be resized to. Sometimes the value of
total allocated space (sum of all allocated chunks/device extents), which
can be parsed from 'btrfs filesystem show' and 'btrfs filesystem usage',
works as the minimum size, but sometimes it does not, namely when device
extents have to relocated to holes (unallocated space) within the new
size of the device (the total allocated space sum).

This change adds the ability to reliably compute such minimum value and
extents 'btrfs filesystem resize' with the following syntax to get such
value:

   btrfs filesystem resize [devid:]get_min_size

Signed-off-by: Filipe Manana 
---

V2: Check if device holes contain the location of superblock mirrors and
correct the minimum size accounting accordingly.
Added missing sudo calls to test, rebeased against development branch
and moved it into the misc-tests category.

 Documentation/btrfs-filesystem.asciidoc |   4 +-
 cmds-filesystem.c   | 219 +++-
 ctree.h |   3 +
 tests/misc-tests.sh |   2 +
 tests/misc-tests/004-shrink-fs/test.sh  |  69 ++
 5 files changed, 295 insertions(+), 2 deletions(-)
 create mode 100755 tests/misc-tests/004-shrink-fs/test.sh

diff --git a/Documentation/btrfs-filesystem.asciidoc 
b/Documentation/btrfs-filesystem.asciidoc
index 31cd51b..2b34242 100644
--- a/Documentation/btrfs-filesystem.asciidoc
+++ b/Documentation/btrfs-filesystem.asciidoc
@@ -93,7 +93,7 @@ If a newlabel optional argument is passed, the label is 
changed.
 NOTE: the maximum allowable length shall be less than 256 chars
 
 // Some wording are extracted by the resize2fs man page
-*resize* [:][+/-][kKmMgGtTpPeE]|[:]max ::
+*resize* 
[:][+/-][kKmMgGtTpPeE]|[:]max|[:]get_min_size 
::
 Resize a mounted filesystem identified by directory . A particular device
 can be resized by specifying a .
 +
@@ -113,6 +113,8 @@ KiB, MiB, GiB, TiB, PiB, or EiB, respectively. Case does 
not matter.
 +
 If \'max' is passed, the filesystem will occupy all available space on the
 device devid.
+If \'get_min_size' is passed, return the minimum size the device can be
+shrunk to, without performing any resize operation.
 +
 The resize command does not manipulate the size of underlying
 partition.  If you wish to enlarge/reduce a filesystem, you must make sure you
diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 800aa4d..115c82a 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -1271,14 +1271,228 @@ static int cmd_defrag(int argc, char **argv)
 }
 
 static const char * const cmd_resize_usage[] = {
-   "btrfs filesystem resize 
[devid:][+/-][kKmMgGtTpPeE]|[devid:]max ",
+   "btrfs filesystem resize 
[devid:][+/-][kKmMgGtTpPeE]|[devid:]max|[devid:]get_min_size ",
"Resize a filesystem",
"If 'max' is passed, the filesystem will occupy all available space",
"on the device 'devid'.",
+   "If 'get_min_size' is passed, return the minimum size the device can",
+   "be shrunk to.",
"[kK] means KiB, which denotes 1KiB = 1024B, 1MiB = 1024KiB, etc.",
NULL
 };
 
+struct dev_extent_elem {
+   u64 start;
+   /* inclusive end */
+   u64 end;
+   struct list_head list;
+};
+
+static int add_dev_extent(struct list_head *list,
+ const u64 start, const u64 end,
+ const int append)
+{
+   struct dev_extent_elem *e;
+
+   e = malloc(sizeof(*e));
+   if (!e)
+   return -ENOMEM;
+
+   e->start = start;
+   e->end = end;
+
+   if (append)
+   list_add_tail(&e->list, list);
+   else
+   list_add(&e->list, list);
+
+   return 0;
+}
+
+static void free_dev_extent_list(struct list_head *list)
+{
+   while (!list_empty(list)) {
+   struct dev_extent_elem *e;
+
+   e = list_first_entry(list, struct dev_extent_elem, list);
+   list_del(&e->list);
+   free(e);
+   }
+}
+
+static void adjust_dev_min_size(struct list_head *extents,
+   struct list_head *holes,
+   u64 *min_size)
+{
+   /*
+* If relocation of the block group of a device extent must happen (see
+* below) scratch space is used for the relocation. So track here the
+* size of the largest device extent that has to be relocated. We track
+* only the largest and not the sum of the sizes of all relocated block
+* groups because after each block group is relocated the running
+* transaction is committed so that pinned space is released.
+*/
+   u64 scratch_space = 0;
+
+   /*
+* List of device extents is sorted by descending order of the extent's
+* end offset. If some extent goes beyond the computed minimum s

[PATCH trivial] Btrfs: Spelling s/consitent/consistent/

2015-07-06 Thread Geert Uytterhoeven
Signed-off-by: Geert Uytterhoeven 
---
 fs/btrfs/qgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d5f1f033b7a00f3c..bf3c3fbed4b691f7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -376,7 +376,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
qgroup = find_qgroup_rb(fs_info, found_key.offset);
if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
(!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
-   btrfs_err(fs_info, "inconsitent qgroup config");
+   btrfs_err(fs_info, "inconsistent qgroup config");
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
}
if (!qgroup) {
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Btrfs - distribute files equally across multiple devices

2015-07-06 Thread Johannes Pfrang
Cross-posting my unix.stackexchange.com question[1] to the btrfs list
(slightly modified):

[1]
https://unix.stackexchange.com/questions/214009/btrfs-distribute-files-equally-across-multiple-devices

-

I have a btrfs volume across two devices that has metadata RAID 1 and
data RAID 0. AFAIK, in the event one drive would fail, practically all
files above the 64KB default stripe size would be corrupted. As this
partition isn't performance critical, but should be space-efficient,
I've thought about re-balancing the filesystem to distribute files
equally across disks, but something like that doesn't seem to exist. The
ultimate goal would be to be able to still read some of the files in the
event of a drive failure.

AFAIK, using "single"/linear data allocation just fills up drives one by
one (at least that's what the wiki says).

Simple example (according to my best knowledge):

Write two 128KB files (file0, file1) to two devices (dev0, dev1):

RAID0:

file0/chunk0 (64KB): dev0
file0/chunk1 (64KB): dev1
file1/chunk0 (64KB): dev0
file1/chunk1 (64KB): dev1

Linear:

file0 (128KB): dev0
file1 (128KB): dev0

distribute files:

file0 (128KB): dev0
file1 (128KB): dev1

The simplest implementation would probably be something like: Always
write files to the disk with the least amount of space used. I think
this may be a valid software-raid use-case, as it combines RAID 0 (w/o
some of the performance gains[2]) with recoverability of about half of
the data/files (balanced by filled space or amount of files) in the
event of a drive-failure[3] by using filesystem information a
hardware-raid doesn't have. In the end this is more or less JBOD with
balanced disk usage + filesystem intelligence.

Is there something like that already in btrfs or could this be something
the btrfs-devs would consider?


[2] Still can read/write multiple files from/to different disks, so less
performance only for "single-file-reads/writes"
[3] using two disks, otherwise (totalDisks-failedDisks)/totalDisks
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Btrfs - distribute files equally across multiple devices

2015-07-06 Thread Roman Mamedov
On Mon, 6 Jul 2015 18:22:52 +0200
Johannes Pfrang  wrote:

> The simplest implementation would probably be something like: Always
> write files to the disk with the least amount of space used. I think
> this may be a valid software-raid use-case, as it combines RAID 0 (w/o
> some of the performance gains[2]) with recoverability of about half of
> the data/files (balanced by filled space or amount of files) in the
> event of a drive-failure[3] by using filesystem information a
> hardware-raid doesn't have. In the end this is more or less JBOD with
> balanced disk usage + filesystem intelligence.

mhddfs does exactly that: https://romanrm.net/mhddfs

-- 
With respect,
Roman


signature.asc
Description: PGP signature


Re: [PATCH v2] Btrfs: fix memory leak in the extent_same ioctl

2015-07-06 Thread Mark Fasheh
Thanks for this Filipe,

On Fri, Jul 03, 2015 at 11:36:49AM +0100, fdman...@kernel.org wrote:
> From: Filipe Manana 
> 
> We were allocating memory with memdup_user() but we were never releasing
> that memory. This affected pretty much every call to the ioctl, whether
> it deduplicated extents or not.
> 
> This issue was reported on IRC by Julian Taylor and on the mailing list
> by Marcel Ritter, credit goes to them for finding the issue.
> 
> Reported-by: Julian Taylor 
> Reported-by: Marcel Ritter 
> Cc: sta...@vger.kernel.org
> Signed-off-by: Filipe Manana 

Reviewed-by: Mark Fasheh 
--Mark

--
Mark Fasheh
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Btrfs - distribute files equally across multiple devices

2015-07-06 Thread Johannes Pfrang
That looks quite interesting!
Unfortunately this removes the ability to specify different RAID-levels
for metadata vs data and actually behaves more like btrfs "single" mode.
According to your link it fills drive by drive instead of distributing
files equally across them:

"When you create a new file in the virtual filesystem, |mhddfs| will
look at the free space, which remains on each of the drives. If the
first drive has enough free space, the file will be created on that
first drive."

What I propose (simplest implementation):

"When you create a new file in the filesystem, btrfswill look at used
space on each of the drives. The file will be created on the drive with
the least used space that can hold the file."

Difference:

mhddfs only achieves maximum recoverability once the filesystem is full
(just like "single"), while my proposal achieves such recoverability
from the start.
(maximum recoverability by (totalDisks-failedDisks)/totalDisks as
percentage of recoverable data/files (depending on by which the fs is
balanced))

Also I'm not sure if it's compatible to btrfs's special
remaining-space-calculation magic ^^

On 06.07.2015 18:45, Roman Mamedov wrote:
> On Mon, 6 Jul 2015 18:22:52 +0200
> Johannes Pfrang  wrote:
>
>> The simplest implementation would probably be something like: Always
>> write files to the disk with the least amount of space used. I think
>> this may be a valid software-raid use-case, as it combines RAID 0 (w/o
>> some of the performance gains[2]) with recoverability of about half of
>> the data/files (balanced by filled space or amount of files) in the
>> event of a drive-failure[3] by using filesystem information a
>> hardware-raid doesn't have. In the end this is more or less JBOD with
>> balanced disk usage + filesystem intelligence.
> mhddfs does exactly that: https://romanrm.net/mhddfs
>




signature.asc
Description: OpenPGP digital signature


Re: Btrfs - distribute files equally across multiple devices

2015-07-06 Thread Hugo Mills
On Mon, Jul 06, 2015 at 06:22:52PM +0200, Johannes Pfrang wrote:
> Cross-posting my unix.stackexchange.com question[1] to the btrfs list
> (slightly modified):
> 
> [1]
> https://unix.stackexchange.com/questions/214009/btrfs-distribute-files-equally-across-multiple-devices
> 
> -
> 
> I have a btrfs volume across two devices that has metadata RAID 1 and
> data RAID 0. AFAIK, in the event one drive would fail, practically all
> files above the 64KB default stripe size would be corrupted. As this
> partition isn't performance critical, but should be space-efficient,
> I've thought about re-balancing the filesystem to distribute files
> equally across disks, but something like that doesn't seem to exist. The
> ultimate goal would be to be able to still read some of the files in the
> event of a drive failure.
> 
> AFAIK, using "single"/linear data allocation just fills up drives one by
> one (at least that's what the wiki says).

   Not quite. In single mode, the FS will allocate linear chunks of
space 1 GiB in size, and use those to write into (fitting many files
into each chunk, potentially). The chunks are allocated as needed, and
will go on the device with the most unallocated space.

   So, with equal-sized devices, the first 1 GiB will go on the first
device, the second 1 GiB on the second device, and so on.

   With unequal devices, you'll put data on the largest device, until
its free space reaches the size of the next largest, and then the
chunks will be alternated between those two, until the free space on
each of the two largest reaches the size of the third-largest, and so
on.

   (e.g. for devices sized 6 TB, 4 TB, 3 TB, the first 2 TB will go
exclusively on the first device; the next 2 TB will go on the first
two devices, alternating in 1 GB chunks; the rest goes across all
three devices, again, alternating in 1 GB chunks.)

   This is all very well for an append-only filesystem, but if you're
changing the files on the FS at all, there's no guarantee as to where
the changed extents will end up -- not even on the same device, let
alone close to the rest of the file on the platter.

   I did work out, some time ago, a prototype chunk allocator (the 1
GiB-scale allocations) that would allow enough flexibility to control
where the next chunk to be allocated would go. However, that still
leaves the extent allocator to deal with, which is the second, and
much harder, part of the problem.

   Basically, don't assume any kind of structure to the location of
your data on the devices you have, and keep good, tested, regular
backups of anything you can't stand to lose and can't replace. There
are no guarantees that would let you assume easily that any one file
is on a single device, or that anything would survive the loss of a
device.

   I'm sure this is an FAQ entry somewhere... It's come up enough
times.

   Hugo.

> The simplest implementation would probably be something like: Always
> write files to the disk with the least amount of space used. I think
> this may be a valid software-raid use-case, as it combines RAID 0 (w/o
> some of the performance gains[2]) with recoverability of about half of
> the data/files (balanced by filled space or amount of files) in the
> event of a drive-failure[3] by using filesystem information a
> hardware-raid doesn't have. In the end this is more or less JBOD with
> balanced disk usage + filesystem intelligence.
> 
> Is there something like that already in btrfs or could this be something
> the btrfs-devs would consider?
> 
> 
> [2] Still can read/write multiple files from/to different disks, so less
> performance only for "single-file-reads/writes"
> [3] using two disks, otherwise (totalDisks-failedDisks)/totalDisks

-- 
Hugo Mills | "How deep will this sub go?"
hugo@... carfax.org.uk | "Oh, she'll go all the way to the bottom if we don't
http://carfax.org.uk/  | stop her."
PGP: E2AB1DE4  |  U571


signature.asc
Description: Digital signature


Re: Btrfs - distribute files equally across multiple devices

2015-07-06 Thread Johannes Pfrang
Thank you. That's a very helpful explanation. I've just did balance
start -dconvert=single ;)
Fwiw, the best explanation about "single" I could find was in the
Glossary[1].
I don't have an account on the wiki, but your first paragraph would fit
great there!


[1] https://btrfs.wiki.kernel.org/index.php/Glossary


On 06.07.2015 19:53, Hugo Mills wrote:
> On Mon, Jul 06, 2015 at 06:22:52PM +0200, Johannes Pfrang wrote:
>Not quite. In single mode, the FS will allocate linear chunks of
> space 1 GiB in size, and use those to write into (fitting many files
> into each chunk, potentially). The chunks are allocated as needed, and
> will go on the device with the most unallocated space.
>
>So, with equal-sized devices, the first 1 GiB will go on the first
> device, the second 1 GiB on the second device, and so on.
>
>With unequal devices, you'll put data on the largest device, until
> its free space reaches the size of the next largest, and then the
> chunks will be alternated between those two, until the free space on
> each of the two largest reaches the size of the third-largest, and so
> on.
>
>(e.g. for devices sized 6 TB, 4 TB, 3 TB, the first 2 TB will go
> exclusively on the first device; the next 2 TB will go on the first
> two devices, alternating in 1 GB chunks; the rest goes across all
> three devices, again, alternating in 1 GB chunks.)
>
>This is all very well for an append-only filesystem, but if you're
> changing the files on the FS at all, there's no guarantee as to where
> the changed extents will end up -- not even on the same device, let
> alone close to the rest of the file on the platter.
>
>I did work out, some time ago, a prototype chunk allocator (the 1
> GiB-scale allocations) that would allow enough flexibility to control
> where the next chunk to be allocated would go. However, that still
> leaves the extent allocator to deal with, which is the second, and
> much harder, part of the problem.
>
>Basically, don't assume any kind of structure to the location of
> your data on the devices you have, and keep good, tested, regular
> backups of anything you can't stand to lose and can't replace. There
> are no guarantees that would let you assume easily that any one file
> is on a single device, or that anything would survive the loss of a
> device.
I promise I won't assume that.

Two 4TB data disks:
- 3TiB+3TiB data=single,meta=raid1 replaceable/unimportant
- 654Gib|654Gib data/meta=raid1 important with regular backups

efficient + safe enough (for my use-case)
>
>I'm sure this is an FAQ entry somewhere... It's come up enough
> times.
>
>Hugo.
>
>



signature.asc
Description: OpenPGP digital signature


Re: strange corruptions found during btrfs check

2015-07-06 Thread Christoph Anton Mitterer
After removing some of the snapshots that were received, the errors at
btrfs check went away.

Is there some list of features in btrfs which are considered stable?
Cause I though send/receive and the subvolumes would be, but apparently
this doesn't seem to be the case :-/


Cheers,
Chris.


smime.p7s
Description: S/MIME cryptographic signature


size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Hendrik Friedel

Hello,

I started with a raid1:
devid1 size 2.73TiB used 2.67TiB path /dev/sdd
devid2 size 2.73TiB used 2.67TiB path /dev/sdb
Then I added a third device, /dev/sdc1 and a balance
btrfs balance start -dconvert=raid5 -mconvert=raid5 /mnt/__Complete_Disk/

Now the file-system looks like this:
Total devices 3 FS bytes used 4.68TiB
devid1 size 2.73TiB used 2.67TiB path /dev/sdd
devid2 size 2.73TiB used 2.67TiB path /dev/sdb
devid3 size 2.73TiB used 240.97GiB path /dev/sdc1

I am surprised by the 240.97GiB...

In the syslog and dmesg I find several:
[108274.415499] btrfs_dev_stat_print_on_error: 8 callbacks suppressed
[108279.840334] btrfs_dev_stat_print_on_error: 12 callbacks suppressed

What's wrong here?

Regards,
Hendrik

---
Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.
https://www.avast.com/antivirus

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Hendrik Friedel

Hello,

ok, sdc seems to have failed (sorry, I checked only sdd and sdb SMART 
values, as sdc is brand new. Maybe a bad assumption, from my side.


I have mounted the device
mount -o recovery,ro

So, what should I do now:
btrfs device delete /dev/sdc /mnt

or

mount -o degraded /dev/sdb /mnt
btrfs device delete missing /mnt

I do have a backup of the most valuable data.
But if you consider one of the above options risky, I might better get a 
new drive before, but this might take a couple of days (in which sdc 
could further degrade).

What is your recommendation?


Regards,
Hendrik

---
Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.
https://www.avast.com/antivirus

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Hugo Mills
On Mon, Jul 06, 2015 at 09:44:53PM +0200, Hendrik Friedel wrote:
> Hello,
> 
> ok, sdc seems to have failed (sorry, I checked only sdd and sdb
> SMART values, as sdc is brand new. Maybe a bad assumption, from my
> side.
> 
> I have mounted the device
> mount -o recovery,ro
> 
> So, what should I do now:
> btrfs device delete /dev/sdc /mnt
> 
> or
> 
> mount -o degraded /dev/sdb /mnt
> btrfs device delete missing /mnt
> 
> I do have a backup of the most valuable data.
> But if you consider one of the above options risky, I might better
> get a new drive before, but this might take a couple of days (in
> which sdc could further degrade).
> What is your recommendation?

   Physically remove the device from the array, mount with -o
degraded, optionally add the new device, and run a balance.

   Hugo.

-- 
Hugo Mills | "I lost my leg in 1942. Some bastard stole it in a
hugo@... carfax.org.uk | pub in Pimlico."
http://carfax.org.uk/  |
PGP: E2AB1DE4  |


signature.asc
Description: Digital signature


Re: size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Donald Pearson
Based on my experience Hugo's advice is critical, get the bad drive
out of the pool when in raid56 and do not try to replace or delete it
while it's still attached and recognized.

If you add a new device, mount degraded and rebalance.  If you don't,
mount degraded then device delete missing.

On Mon, Jul 6, 2015 at 2:49 PM, Hugo Mills  wrote:
> On Mon, Jul 06, 2015 at 09:44:53PM +0200, Hendrik Friedel wrote:
>> Hello,
>>
>> ok, sdc seems to have failed (sorry, I checked only sdd and sdb
>> SMART values, as sdc is brand new. Maybe a bad assumption, from my
>> side.
>>
>> I have mounted the device
>> mount -o recovery,ro
>>
>> So, what should I do now:
>> btrfs device delete /dev/sdc /mnt
>>
>> or
>>
>> mount -o degraded /dev/sdb /mnt
>> btrfs device delete missing /mnt
>>
>> I do have a backup of the most valuable data.
>> But if you consider one of the above options risky, I might better
>> get a new drive before, but this might take a couple of days (in
>> which sdc could further degrade).
>> What is your recommendation?
>
>Physically remove the device from the array, mount with -o
> degraded, optionally add the new device, and run a balance.
>
>Hugo.
>
> --
> Hugo Mills | "I lost my leg in 1942. Some bastard stole it in a
> hugo@... carfax.org.uk | pub in Pimlico."
> http://carfax.org.uk/  |
> PGP: E2AB1DE4  |
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Omar Sandoval
On 07/06/2015 01:01 PM, Donald Pearson wrote:
> Based on my experience Hugo's advice is critical, get the bad drive
> out of the pool when in raid56 and do not try to replace or delete it
> while it's still attached and recognized.
> 
> If you add a new device, mount degraded and rebalance.  If you don't,
> mount degraded then device delete missing.
> 

Watch out, replacing a missing device in RAID 5/6 currently doesn't work
and will cause a kernel BUG(). See my patch series here:
http://www.spinics.net/lists/linux-btrfs/msg44874.html

-- 
Omar
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Hendrik Friedel

Hello,

oh dear, I fear I am in trouble:
recovery-mounted, I tried to save some data, but the system hung.
So I re-booted and sdc is now physically disconnected.

Label: none  uuid: b4a6cce6-dc9c-4a13-80a4-ed6bc5b40bb8
Total devices 3 FS bytes used 4.67TiB
devid1 size 2.73TiB used 2.67TiB path /dev/sdc
devid2 size 2.73TiB used 2.67TiB path /dev/sdb
*** Some devices missing

I try to mount the rest again:
mount -o recovery,ro /dev/sdb /mnt/__Complete_Disk
mount: wrong fs type, bad option, bad superblock on /dev/sdb,
   missing codepage or helper program, or other error
   In some cases useful info is found in syslog - try
   dmesg | tail  or so

root@homeserver:~# dmesg | tail 


[  447.059275] BTRFS info (device sdc): enabling auto recovery
[  447.059280] BTRFS info (device sdc): disk space caching is enabled
[  447.086844] BTRFS: failed to read chunk tree on sdc
[  447.110588] BTRFS: open_ctree failed
[  474.496778] BTRFS info (device sdc): enabling auto recovery
[  474.496781] BTRFS info (device sdc): disk space caching is enabled
[  474.519005] BTRFS: failed to read chunk tree on sdc
[  474.540627] BTRFS: open_ctree failed


mount -o degraded,ro /dev/sdb /mnt/__Complete_Disk
Does work now though.

So, how can I remove the reference to the failed disk and check the data 
for consistency (scrub I suppose, but is it safe?)?


Regards,
Hendrik



On 06.07.2015 22:52, Omar Sandoval wrote:

On 07/06/2015 01:01 PM, Donald Pearson wrote:

Based on my experience Hugo's advice is critical, get the bad drive
out of the pool when in raid56 and do not try to replace or delete it
while it's still attached and recognized.

If you add a new device, mount degraded and rebalance.  If you don't,
mount degraded then device delete missing.



Watch out, replacing a missing device in RAID 5/6 currently doesn't work
and will cause a kernel BUG(). See my patch series here:
http://www.spinics.net/lists/linux-btrfs/msg44874.html




--
Hendrik Friedel
Auf dem Brink 12
28844 Weyhe
Tel. 04203 8394854
Mobil 0178 1874363

---
Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.
https://www.avast.com/antivirus

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


btrfs check --repair crash, and btrfs-cleaner crash

2015-07-06 Thread Marc MERLIN

myth:~# btrfs check --repair /dev/mapper/crypt_sdd1 
enabling repair mode
Checking filesystem on /dev/mapper/crypt_sdd1
UUID: 024ba4d0-dacb-438d-9f1b-eeb34083fe49
checking extents
cmds-check.c:4486: add_data_backref: Assertion `back->bytes != max_size` failed.
btrfs[0x8066a73]
btrfs[0x8066aa4]
btrfs[0x8067991]
btrfs[0x806b4ab]
btrfs[0x806b9a3]
btrfs[0x806c5b2]
btrfs(cmd_check+0x1088)[0x806eddf]
btrfs(main+0x153)[0x80557c6]
/lib/i386-linux-gnu/libc.so.6(__libc_start_main+0xf3)[0xb75064d3]
btrfs[0x80557ec]

myth:~# btrfs --version
btrfs-progs v4.0

Is anyone interested in getting data off this filesystem or having me
try newer code/a patch?

filesystem is 10TB-ish, so sending an image isn't going to be easy though.

I can mount with -o ro without it crashing, but if I drop ro, it then
tries to do something and crashes, and unfortunately the error doesn't
make it to syslog

Screenshot: http://marc.merlins.org/tmp/btrfs_crash.jpg

Marc
-- 
"A mouse is a device used to point at the xterm you want to type in" - A.S.R.
Microsoft is to operating systems 
   what McDonalds is to gourmet cooking
Home page: http://marc.merlins.org/ | PGP 1024R/763BE901
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


running duperemove but no free space gain

2015-07-06 Thread Mordechay Kaganer
B.H.

Hello.

I have a btrfs volume which is used as a backup using rsync from the
main servers. It contains many duplicate files across different
subvolumes and i have some read only snapshots of each subvolume,
which are created every time after the backup completes.

I'm was trying to gain some free space using duperemove (compiled from
git master of this repo: https://github.com/markfasheh/duperemove).

Executed like this:

duperemove -rdAh  

Both directories point to the most recent read only snapshots of the
corresponding subvolumes, but not to the subvolumes themselves, so i
had to add -r option. AFAIK, they should point to exactly the same
data because nothing was changed since the snapshots were taken.

It runs successfully for several hours and prints out many files which
are indeed duplicate like this:

Showing 4 identical extents with id 5164bb47
Start   Length  Filename
0.0 4.8M""
0.0 4.8M""
0.0 4.8M""
0.0 4.8M""
skip...
[0x78dee80] Try to dedupe extents with id 5164bb47
[0x78dee80] Dedupe 3 extents (id: 5164bb47) with target: (0.0, 4.8M), ""

But the actual free space reported by "df" or by "btrfs fi df" doesn't
seem to change. Used space and metadata space even increases slightly.

I thought that doing deduplication on a file in one snapshot would
affect all snapshots/subvolumes that contain this (exact version of
the) file because they all actually should point to the same data
extents, am i wrong?

Versions:

duperemove v0.11-dev

# uname -a
Linux yemot-bu 4.1.0-040100-generic #201507030940 SMP Fri Jul 3
09:41:47 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux

# btrfs version
btrfs-progs v4.1

Thanks!

-- 
משיח NOW!
Moshiach is coming very soon, prepare yourself!
יחי אדוננו מורינו ורבינו מלך המשיח לעולם ועד!
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Donald Pearson
If you can mount it RO, first thing to do is back up any data that you
care about.

According to the bug that Omar posted you should not try a device
replace and you should not try a scrub with a missing device.

You may be able to just do a device delete missing, then separately do
a device add of a new drive, or rebalance back in to raid1.

On Mon, Jul 6, 2015 at 4:12 PM, Hendrik Friedel  wrote:
> Hello,
>
> oh dear, I fear I am in trouble:
> recovery-mounted, I tried to save some data, but the system hung.
> So I re-booted and sdc is now physically disconnected.
>
> Label: none  uuid: b4a6cce6-dc9c-4a13-80a4-ed6bc5b40bb8
> Total devices 3 FS bytes used 4.67TiB
> devid1 size 2.73TiB used 2.67TiB path /dev/sdc
> devid2 size 2.73TiB used 2.67TiB path /dev/sdb
> *** Some devices missing
>
> I try to mount the rest again:
> mount -o recovery,ro /dev/sdb /mnt/__Complete_Disk
> mount: wrong fs type, bad option, bad superblock on /dev/sdb,
>missing codepage or helper program, or other error
>In some cases useful info is found in syslog - try
>dmesg | tail  or so
>
> root@homeserver:~# dmesg | tail
> [  447.059275] BTRFS info (device sdc): enabling auto recovery
> [  447.059280] BTRFS info (device sdc): disk space caching is enabled
> [  447.086844] BTRFS: failed to read chunk tree on sdc
> [  447.110588] BTRFS: open_ctree failed
> [  474.496778] BTRFS info (device sdc): enabling auto recovery
> [  474.496781] BTRFS info (device sdc): disk space caching is enabled
> [  474.519005] BTRFS: failed to read chunk tree on sdc
> [  474.540627] BTRFS: open_ctree failed
>
>
> mount -o degraded,ro /dev/sdb /mnt/__Complete_Disk
> Does work now though.
>
> So, how can I remove the reference to the failed disk and check the data for
> consistency (scrub I suppose, but is it safe?)?
>
> Regards,
> Hendrik
>
>
>
>
> On 06.07.2015 22:52, Omar Sandoval wrote:
>>
>> On 07/06/2015 01:01 PM, Donald Pearson wrote:
>>>
>>> Based on my experience Hugo's advice is critical, get the bad drive
>>> out of the pool when in raid56 and do not try to replace or delete it
>>> while it's still attached and recognized.
>>>
>>> If you add a new device, mount degraded and rebalance.  If you don't,
>>> mount degraded then device delete missing.
>>>
>>
>> Watch out, replacing a missing device in RAID 5/6 currently doesn't work
>> and will cause a kernel BUG(). See my patch series here:
>> http://www.spinics.net/lists/linux-btrfs/msg44874.html
>>
>
>
> --
> Hendrik Friedel
> Auf dem Brink 12
> 28844 Weyhe
> Tel. 04203 8394854
> Mobil 0178 1874363
>
>
> ---
> Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.
> https://www.avast.com/antivirus
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: running duperemove but no free space gain

2015-07-06 Thread Mark Fasheh
On Tue, Jul 07, 2015 at 12:54:01AM +0300, Mordechay Kaganer wrote:
> I have a btrfs volume which is used as a backup using rsync from the
> main servers. It contains many duplicate files across different
> subvolumes and i have some read only snapshots of each subvolume,
> which are created every time after the backup completes.
> 
> I'm was trying to gain some free space using duperemove (compiled from
> git master of this repo: https://github.com/markfasheh/duperemove).
> 
> Executed like this:
> 
> duperemove -rdAh  
> 
> Both directories point to the most recent read only snapshots of the
> corresponding subvolumes, but not to the subvolumes themselves, so i
> had to add -r option. AFAIK, they should point to exactly the same
> data because nothing was changed since the snapshots were taken.
> 
> It runs successfully for several hours and prints out many files which
> are indeed duplicate like this:
> 
> Showing 4 identical extents with id 5164bb47
> Start   Length  Filename
> 0.0 4.8M""
> 0.0 4.8M""
> 0.0 4.8M""
> 0.0 4.8M""
> skip...
> [0x78dee80] Try to dedupe extents with id 5164bb47
> [0x78dee80] Dedupe 3 extents (id: 5164bb47) with target: (0.0, 4.8M), ""
> 
> But the actual free space reported by "df" or by "btrfs fi df" doesn't
> seem to change. Used space and metadata space even increases slightly.

There were some patches for 4.2 which are both on the list and upstream that
fix an issue where the unligned tail of extents wasn't being deduplicated.
It sounds like you may have hit this. So we can tell, can you run the
'show-shared-extents' program that comes with duperemove (or 'filefrag -e')
against two of the files that should have been deduped together and provide
the output here. If most of the extent is showing deduped but there's a
not-deduped tail extent then that's most likely what you're seeing.


> I thought that doing deduplication on a file in one snapshot would
> affect all snapshots/subvolumes that contain this (exact version of
> the) file because they all actually should point to the same data
> extents, am i wrong?

Well the case you're describing is one where dedupe wouldn't work - the
extent would already be considered deduplicated since there is only one of
them.

If the data has changed from one snapshot to another, we've created new
extents (for the new data) and it can be deduped against any other extent.
For duperemove to discover it though you have to provide it a path which
will eventually resolve to those extents (that is, duperemove has to find it
in the file scan stage).
--Mark

--
Mark Fasheh
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Donald Pearson
Anything in dmesg?

On Mon, Jul 6, 2015 at 5:07 PM, hend...@friedels.name
 wrote:
> Hallo,
>
> It seems, that mounting works, but the System locks completely soon after I
> backing up.
>
>
> Greetings,
>
> Hendrik
>
>
> -- Originalnachricht--
>
> Von: Donald Pearson
>
> Datum: Mo., 6. Juli 2015 23:49
>
> An: Hendrik Friedel;
>
> Cc: Omar Sandoval;Hugo Mills;Btrfs BTRFS;
>
> Betreff:Re: size 2.73TiB used 240.97GiB after balance
>
>
> If you can mount it RO, first thing to do is back up any data that youcare
> about.According to the bug that Omar posted you should not try a
> devicereplace and you should not try a scrub with a missing device.You may
> be able to just do a device delete missing, then separately doa device add
> of a new drive, or rebalance back in to raid1.On Mon, Jul 6, 2015 at 4:12
> PM, Hendrik Friedel  wrote:> Hello,>> oh dear, I fear I am in trouble:>
> recovery-mounted, I tried to save some data, but the system hung.> So I
> re-booted and sdc is now physically disconnected.>> Label: none  uuid:
> b4a6cce6-dc9c-4a13-80a4-ed6bc5b40bb8> Total devices 3 FS bytes used
> 4.67TiB> devid1 size 2.73TiB used 2.67TiB path /dev/sdc>
> devid2 size 2.73TiB used 2.67TiB path /dev/sdb> *** Some devices
> missing>> I try to mount the rest again:> mount -o recovery,ro /dev/sdb
> /mnt/__Complete_Disk> mount: wrong fs type, bad option, bad superblock on
> /dev/sdb,>missing codepage or helper program, or other error>
> In some cases useful info is found in syslog - try>dmesg | tail  or
> so>> root@homeserver:~# dmesg | tail> [  447.059275] BTRFS info (device
> sdc): enabling auto recovery> [  447.059280] BTRFS info (device sdc): disk
> space caching is enabled> [  447.086844] BTRFS: failed to read chunk tree on
> sdc> [  447.110588] BTRFS: open_ctree failed> [  474.496778] BTRFS info
> (device sdc): enabling auto recovery> [  474.496781] BTRFS info (device
> sdc): disk space caching is enabled> [  474.519005] BTRFS: failed to read
> chunk tree on sdc> [  474.540627] BTRFS: open_ctree failed>>> mount -o
> degraded,ro /dev/sdb /mnt/__Complete_Disk> Does work now though.>> So, how
> can I remove the reference to the failed disk and check the data for>
> consistency (scrub I suppose, but is it safe?)?>> Regards,> Hendrik> On
> 06.07.2015 22:52, Omar Sandoval wrote: On 07/06/2015 01:01 PM, Donald
> Pearson wrote:>> Based on my experience Hugo's advice is critical, get
> the bad drive>>> out of the pool when in raid56 and do not try to replace or
> delete it>>> while it's still attached and recognized.>> If you add a
> new device, mount degraded and rebalance.  If you don't,>>> mount degraded
> then device delete missing.>>> Watch out, replacing a missing device in
> RAID 5/6 currently doesn't work>> and will cause a kernel BUG(). See my
> patch series here:>>
> http://www.spinics.net/lists/linux-btrfs/msg44874.html> --> Hendrik
> Friedel> Auf dem Brink 12> 28844 Weyhe> Tel. 04203 8394854> Mobil 0178
> 1874363>>> ---> Diese E-Mail wurde von Avast Antivirus-Software auf Viren
> geprüft.> https://www.avast.com/antivirus>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: running duperemove but no free space gain

2015-07-06 Thread Mordechay Kaganer
B.H.

On Tue, Jul 7, 2015 at 1:34 AM, Mark Fasheh  wrote:
>>
>> It runs successfully for several hours and prints out many files which
>> are indeed duplicate like this:
>>
>> Showing 4 identical extents with id 5164bb47
>> Start   Length  Filename
>> 0.0 4.8M""
>> 0.0 4.8M""
>> 0.0 4.8M""
>> 0.0 4.8M""
>> skip...
>> [0x78dee80] Try to dedupe extents with id 5164bb47
>> [0x78dee80] Dedupe 3 extents (id: 5164bb47) with target: (0.0, 4.8M), ""
>>
>> But the actual free space reported by "df" or by "btrfs fi df" doesn't
>> seem to change. Used space and metadata space even increases slightly.
>
> There were some patches for 4.2 which are both on the list and upstream that
> fix an issue where the unligned tail of extents wasn't being deduplicated.
> It sounds like you may have hit this. So we can tell, can you run the
> 'show-shared-extents' program that comes with duperemove (or 'filefrag -e')
> against two of the files that should have been deduped together and provide
> the output here. If most of the extent is showing deduped but there's a
> not-deduped tail extent then that's most likely what you're seeing.
>

# show-shared-extents  
(fiemap) [0] fe_logical: 0, fe_length: 131072, fe_physical:
350771204096, fe_flags: 0x2008 (encoded shared )
(fiemap) [1] fe_logical: 131072, fe_length: 131072, fe_physical:
350771318784, fe_flags: 0x2008 (encoded shared )
(fiemap) [2] fe_logical: 262144, fe_length: 131072, fe_physical:
350771425280, fe_flags: 0x2008 (encoded shared )
(fiemap) [3] fe_logical: 393216, fe_length: 131072, fe_physical:
350771548160, fe_flags: 0x2008 (encoded shared )
(fiemap) [4] fe_logical: 524288, fe_length: 131072, fe_physical:
350771666944, fe_flags: 0x2008 (encoded shared )
(fiemap) [5] fe_logical: 655360, fe_length: 131072, fe_physical:
350771781632, fe_flags: 0x2008 (encoded shared )
(fiemap) [6] fe_logical: 786432, fe_length: 131072, fe_physical:
350771900416, fe_flags: 0x2008 (encoded shared )
(fiemap) [7] fe_logical: 917504, fe_length: 131072, fe_physical:
350772019200, fe_flags: 0x2008 (encoded shared )
(fiemap) [8] fe_logical: 1048576, fe_length: 131072, fe_physical:
350772137984, fe_flags: 0x2008 (encoded shared )
(fiemap) [9] fe_logical: 1179648, fe_length: 131072, fe_physical:
350772256768, fe_flags: 0x2008 (encoded shared )
(fiemap) [10] fe_logical: 1310720, fe_length: 131072, fe_physical:
350772375552, fe_flags: 0x2008 (encoded shared )
(fiemap) [11] fe_logical: 1441792, fe_length: 131072, fe_physical:
350772494336, fe_flags: 0x2008 (encoded shared )
(fiemap) [12] fe_logical: 1572864, fe_length: 131072, fe_physical:
350772617216, fe_flags: 0x2008 (encoded shared )
(fiemap) [13] fe_logical: 1703936, fe_length: 131072, fe_physical:
350772740096, fe_flags: 0x2008 (encoded shared )
(fiemap) [14] fe_logical: 1835008, fe_length: 131072, fe_physical:
350772854784, fe_flags: 0x2008 (encoded shared )
(fiemap) [15] fe_logical: 1966080, fe_length: 131072, fe_physical:
350772977664, fe_flags: 0x2008 (encoded shared )
(fiemap) [16] fe_logical: 2097152, fe_length: 131072, fe_physical:
350773100544, fe_flags: 0x2008 (encoded shared )
(fiemap) [17] fe_logical: 2228224, fe_length: 131072, fe_physical:
350773223424, fe_flags: 0x2008 (encoded shared )
(fiemap) [18] fe_logical: 2359296, fe_length: 131072, fe_physical:
350773342208, fe_flags: 0x2008 (encoded shared )
(fiemap) [19] fe_logical: 2490368, fe_length: 131072, fe_physical:
350773460992, fe_flags: 0x2008 (encoded shared )
(fiemap) [20] fe_logical: 2621440, fe_length: 131072, fe_physical:
350773579776, fe_flags: 0x2008 (encoded shared )
(fiemap) [21] fe_logical: 2752512, fe_length: 131072, fe_physical:
350773698560, fe_flags: 0x2008 (encoded shared )
(fiemap) [22] fe_logical: 2883584, fe_length: 131072, fe_physical:
350773821440, fe_flags: 0x2008 (encoded shared )
(fiemap) [23] fe_logical: 3014656, fe_length: 131072, fe_physical:
350773944320, fe_flags: 0x2008 (encoded shared )
(fiemap) [24] fe_logical: 3145728, fe_length: 131072, fe_physical:
350774067200, fe_flags: 0x2008 (encoded shared )
(fiemap) [25] fe_logical: 3276800, fe_length: 131072, fe_physical:
350774181888, fe_flags: 0x2008 (encoded shared )
(fiemap) [26] fe_logical: 3407872, fe_length: 131072, fe_physical:
350774300672, fe_flags: 0x2008 (encoded shared )
(fiemap) [27] fe_logical: 3538944, fe_length: 131072, fe_physical:
350774423552, fe_flags: 0x2008 (encoded shared )
(fiemap) [28] fe_logical: 3670016, fe_length: 131072, fe_physical:
350774546432, fe_flags: 0x2008 (encoded shared )
(fiemap) [29] fe_logical: 3801088, fe_length: 131072, fe_physical:
350774669312, fe_flags: 0x2008 (encoded shared )
(fiemap) [30] fe_logical: 3932160, fe_length: 131072, fe_physical:
350774792192, fe_flags: 0x2008 (encoded shared )
(fiemap) [31] fe_logical: 4063232, fe_length: 131072, fe_physical:
350774915072, fe_flags: 0x2008 (encoded shared )
(fiemap) [32] fe_logical: 4194304, fe_length: 131072, fe_physical:
350775037952, fe_

Re: running duperemove but no free space gain

2015-07-06 Thread Mark Fasheh
On Tue, Jul 07, 2015 at 02:03:06AM +0300, Mordechay Kaganer wrote:
> 
> Checked some more pairs, most extents appear as "shared". In some
> cases there is "last encoded" not shared extent with length 4096.
> 
> Since i use snapshots, may shared also mean "shared between snapshots"?

Yes I forgot about that but in your case almost everything will be reported
shared. Btw, I have to leave my office now but will get to the rest of your 
e-mail
later.

--
Mark Fasheh
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: strange corruptions found during btrfs check

2015-07-06 Thread Duncan
Christoph Anton Mitterer posted on Mon, 06 Jul 2015 20:40:23 +0200 as
excerpted:

> After removing some of the snapshots that were received, the errors at
> btrfs check went away.
> 
> Is there some list of features in btrfs which are considered stable?
> Cause I though send/receive and the subvolumes would be, but apparently
> this doesn't seem to be the case :-/

[List-regular non-developer but btrfs using admin answer.]

I know of no such list, per se.  There are, however, features that are 
known to be still being very actively worked on, either because they are 
very new to nominal code-completion (raid56 mode), or because they are 
simply complicated problems, possibly having to be redone with a new 
approach as the devs learned more about the the issues with the existing 
approach.

This list would include:

raid56 mode (new)

quotas (on I think their second partial rewrite, third approach, now)

send/receive (there's simply very many very complex corner-cases to find 
and deal with)


Subvolumes/snapshots should however be reasonably stable, since their 
basis is pretty close to that of btrfs itself, b-trees and COW, and the 
hooks for managing them (the GUI) have been established for some time.  
The problems involving subvolumes/snapshots aren't so much in that 
subsystem, but in whatever other subsystems are involved as well.  The 
interaction between quotas and subvolumes has been a problem point, for 
instance, and snapshot-aware-defrag continues to be disabled ATM as it 
simply didn't scale due to problems in other areas (quotas being one of 
them).  The interaction between send/receive and subvolumes/snapshots is 
also a problem, but again, not so much on the subvolume/snapshot side, as 
on the send/receive side.


-- 
Duncan - List replies preferred.   No HTML msgs.
"Every nonfree program has a lord, a master --
and if you use the program, he is your master."  Richard Stallman

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: strange corruptions found during btrfs check

2015-07-06 Thread Christoph Anton Mitterer
On Tue, 2015-07-07 at 00:47 +, Duncan wrote:
> The interaction between send/receive and subvolumes/snapshots
> is also a problem, but again, not so much on the subvolume/snapshot 
> side, as on the send/receive side.

Well I haven't looked into any code, so the following is just
perception:
It seemed that send/receive itself has always worked correctly for me
so far.
I.e. I ran some complete diff -qr over the source and target of an
already incrementally (-p) sent/received snapshot.
That brought no error.

The aforementioned btrfs check errors only occurred after I had removed
older snapshots on the receiving side, i.e. snapshots that btrfs, via
the -p , used for building together
the more recent snapshot.

The error messages seem to imply that some of that got lost,... or at
least that would be my first wild guess... as if refs in the newer
snapshot on the receiving side point into the void, as the older
snapshot's objects, they were pointing to, have been removed (or some
of them lost).



Apart from that, I think it's quite an issue that the core developers
don't keep some well maintained list of working/experimental
features... that's nearly as problematic as the complete lack of good
and extensive end user (i.e. sysadmin) documentation.
btrfs is quite long around now, and people start using it... but when
they cannot really tell what's stable and what's not (respectively
which parts of e.g. raid56 still need polishing) and they then stumble
over problems, trust into btrfs is easily lost. :(

Cheers,
Chris.

smime.p7s
Description: S/MIME cryptographic signature


Re: strange corruptions found during btrfs check

2015-07-06 Thread Duncan
Christoph Anton Mitterer posted on Tue, 07 Jul 2015 03:03:25 +0200 as
excerpted:

> Well I haven't looked into any code, so the following is just
> perception: It seemed that send/receive itself has always worked
> correctly for me so far.
> I.e. I ran some complete diff -qr over the source and target of an
> already incrementally (-p) sent/received snapshot.
> That brought no error.

In general, the send/receive corner-cases are of the type where both the 
send and the receive complete successfully, it should be reliable, but 
sometimes it won't complete successfully.

> The aforementioned btrfs check errors only occurred after I had removed
> older snapshots on the receiving side, i.e. snapshots that btrfs, via
> the -p , used for building together
> the more recent snapshot.
> 
> The error messages seem to imply that some of that got lost,... or at
> least that would be my first wild guess... as if refs in the newer
> snapshot on the receiving side point into the void, as the older
> snapshot's objects, they were pointing to, have been removed (or some of
> them lost).

That would imply either a general btrfs bug (see stability discussion 
below) or perhaps a below-filesystem error, that happened to be exposed 
by the snapshot deletion.

It does look like a snapshot subsystem error, agreed, and conceivably 
could even be one at some level.  However, the point I sort of made, but 
not well, in the previous reply, was that the snapshot and subvolume 
subsystem is so reliant on the core assumptions that btrfs itself makes 
about copy-on-write, etc, that the two cores really can't be easily 
separated, such that if deletion of a particular snapshot actually 
deletes extents pointed to by another snapshot, it's not a problem with 
the subvolume/snapshot system so much, as with btrfs itself.

What /might/ be happening is that an extent usage reference count was 
somehow too low, such that when the snapshot was removed, the reference 
count decremented to zero and btrfs thus thought it safe to remove the 
actual data extents as well.  However, shared-extents are actually a core 
feature of btrfs itself, relied upon not just by snapshot/subvolumes, but 
for instance used with cp --reflink=always when both instances of the 
file are on the same subvolume.  So while such a reference count bug 
could certainly trigger with snapshot deletion, it wouldn't be a snapshot 
subsystem bug, but rather, a bug in core btrfs itself.

The snapshot/subvolume subsystem, then, should be as stable as btrfs 
itself is, the point I made in my original reply, but again, more on that 
below.

> Apart from that, I think it's quite an issue that the core developers
> don't keep some well maintained list of working/experimental features...
> that's nearly as problematic as the complete lack of good and extensive
> end user (i.e. sysadmin) documentation.
> btrfs is quite long around now, and people start using it... but when
> they cannot really tell what's stable and what's not (respectively which
> parts of e.g. raid56 still need polishing) and they then stumble over
> problems, trust into btrfs is easily lost. :(

Actually, that's a bit of a sore spot...

Various warnings, in mkfs.btrfs, in the kernel config help text for 
btrfs, etc, about btrfs being experimental, are indeed being removed, tho 
some of us think it may be a bit premature.  And various distros are now 
shipping btrfs as the default for one or more of their default 
partitions.  OpenSuSE is for example shipping with btrfs for the system 
partition, to enable update rollbacks via btrfs snapshotting, among other 
things.

But, btrfs itself remains under very heavy development.

As I've expanded upon in previous posts, due to the dangers of premature 
optimization, perhaps one of the most direct measures of when 
_developers_ consider something stable, is whether they've done 
production-level optimizations in areas where pre-production code may 
well change, since if they optimize and then it does change, they lose 
those optimizations and must recode them.  As an example, one reasonably 
well known optimization point in btrfs is the raid1-mode read-mode device 
scheduler.  Btrfs' current scheduler implementation is very simple and 
very easy to test; it simply chooses the first or second copy of the data 
based on even/odd PID.  That works well enough as an initial scheduler, 
being very simple to implement, ensuring both copies of the data get read 
over time, and being easy to test, since selectably loading either side 
or both sides is as easy as even/odd PID for the read test.

But for a single-read-task on an otherwise idle system, it's horrible, 
50% of best-case throughput.  And if your use-case happens to spawn 
multiple work threads such that they're all even-PID or all odd-PID, one 
device is saturated, while the other sits entirely idle!  Simple and 
easily understood case of obviously not yet production optimized!  But 
kernel code already exists for a much 

[PATCH] Documentation: update btrfs-replace manual to support RAID5/6

2015-07-06 Thread Wang Yanfeng
Man manual need to be updated since RAID5/6 has been supported
by btrfs-replace.

Signed-off-by: Wang Yanfeng 
---
 Documentation/btrfs-replace.asciidoc | 5 -
 1 file changed, 5 deletions(-)

diff --git a/Documentation/btrfs-replace.asciidoc 
b/Documentation/btrfs-replace.asciidoc
index 774d850..5a14a40 100644
--- a/Documentation/btrfs-replace.asciidoc
+++ b/Documentation/btrfs-replace.asciidoc
@@ -13,11 +13,6 @@ DESCRIPTION
 ---
 *btrfs replace* is used to replace btrfs managed devices with other device.
 
-NOTE: this is not currently supported for RAID5/6 profiles and must use the
-device add/delete workaround.
-It is recommended to see `btrfs-device`(8) for more details about btrfs device
-management.
-
 SUBCOMMAND
 --
 *cancel* ::
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: size 2.73TiB used 240.97GiB after balance

2015-07-06 Thread Hendrik Friedel

Hello,

while mounting works with the recovery option, the system locks after 
reading.

dmesg shows:
[  684.258246] ata6.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0
[  684.258249] ata6.00: irq_stat 0x4001
[  684.258252] ata6.00: failed command: DATA SET MANAGEMENT
[  684.258255] ata6.00: cmd 06/01:01:00:00:00/00:00:00:00:00/a0 tag 26 
dma 512 out
[  684.258255]  res 51/04:01:01:00:00/00:00:00:00:00/a0 Emask 
0x1 (device error)

[  684.258256] ata6.00: status: { DRDY ERR }
[  684.258258] ata6.00: error: { ABRT }
[  684.258266] sd 5:0:0:0: [sdd] tag#26 FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
[  684.258268] sd 5:0:0:0: [sdd] tag#26 Sense Key : Illegal Request 
[current] [descriptor]

[  684.258270] sd 5:0:0:0: [sdd] tag#26 Add. Sense: Unaligned write command
[  684.258272] sd 5:0:0:0: [sdd] tag#26 CDB: Write same(16) 93 08 00 00 
00 00 00 01 d3 80 00 00 00 80 00 00



So, also this drive is failing?!

Regards,
Hendrik

On 07.07.2015 00:59, Donald Pearson wrote:

Anything in dmesg?

On Mon, Jul 6, 2015 at 5:07 PM, hend...@friedels.name
 wrote:

Hallo,

It seems, that mounting works, but the System locks completely soon after I
backing up.


Greetings,

Hendrik


-- Originalnachricht--

Von: Donald Pearson

Datum: Mo., 6. Juli 2015 23:49

An: Hendrik Friedel;

Cc: Omar Sandoval;Hugo Mills;Btrfs BTRFS;

Betreff:Re: size 2.73TiB used 240.97GiB after balance


If you can mount it RO, first thing to do is back up any data that youcare
about.According to the bug that Omar posted you should not try a
devicereplace and you should not try a scrub with a missing device.You may
be able to just do a device delete missing, then separately doa device add
of a new drive, or rebalance back in to raid1.On Mon, Jul 6, 2015 at 4:12
PM, Hendrik Friedel  wrote:> Hello,>> oh dear, I fear I am in trouble:>
recovery-mounted, I tried to save some data, but the system hung.> So I
re-booted and sdc is now physically disconnected.>> Label: none  uuid:
b4a6cce6-dc9c-4a13-80a4-ed6bc5b40bb8> Total devices 3 FS bytes used
4.67TiB> devid1 size 2.73TiB used 2.67TiB path /dev/sdc>
devid2 size 2.73TiB used 2.67TiB path /dev/sdb> *** Some devices
missing>> I try to mount the rest again:> mount -o recovery,ro /dev/sdb
/mnt/__Complete_Disk> mount: wrong fs type, bad option, bad superblock on
/dev/sdb,>missing codepage or helper program, or other error>
In some cases useful info is found in syslog - try>dmesg | tail  or
so>> root@homeserver:~# dmesg | tail> [  447.059275] BTRFS info (device
sdc): enabling auto recovery> [  447.059280] BTRFS info (device sdc): disk
space caching is enabled> [  447.086844] BTRFS: failed to read chunk tree on
sdc> [  447.110588] BTRFS: open_ctree failed> [  474.496778] BTRFS info
(device sdc): enabling auto recovery> [  474.496781] BTRFS info (device
sdc): disk space caching is enabled> [  474.519005] BTRFS: failed to read
chunk tree on sdc> [  474.540627] BTRFS: open_ctree failed>>> mount -o
degraded,ro /dev/sdb /mnt/__Complete_Disk> Does work now though.>> So, how
can I remove the reference to the failed disk and check the data for>
consistency (scrub I suppose, but is it safe?)?>> Regards,> Hendrik> On
06.07.2015 22:52, Omar Sandoval wrote: On 07/06/2015 01:01 PM, Donald
Pearson wrote:>> Based on my experience Hugo's advice is critical, get
the bad drive>>> out of the pool when in raid56 and do not try to replace or
delete it>>> while it's still attached and recognized.>> If you add a
new device, mount degraded and rebalance.  If you don't,>>> mount degraded
then device delete missing.>>> Watch out, replacing a missing device in
RAID 5/6 currently doesn't work>> and will cause a kernel BUG(). See my
patch series here:>>
http://www.spinics.net/lists/linux-btrfs/msg44874.html> --> Hendrik
Friedel> Auf dem Brink 12> 28844 Weyhe> Tel. 04203 8394854> Mobil 0178
1874363>>> ---> Diese E-Mail wurde von Avast Antivirus-Software auf Viren
geprüft.> https://www.avast.com/antivirus>



--
Hendrik Friedel
Auf dem Brink 12
28844 Weyhe
Tel. 04203 8394854
Mobil 0178 1874363

---
Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.
https://www.avast.com/antivirus

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html