[PATCH v2 2/3] btrfs: split parse_early_options() in two
Extract the part related to subvol option from parse_early_options() and move it to new parse function (parse_subvol_options()). This is because mount_root() doesn't need to handle subvol options. Signed-off-by: Tomohiro Misono--- fs/btrfs/super.c | 75 +++- 1 file changed, 58 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3c32677..9498743 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -448,7 +448,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_subvolrootid: case Opt_device: /* -* These are parsed by btrfs_parse_early_options +* These are parsed by btrfs_parse_subvol_options +* and btrfs_parse_early_options * and can be happily ignored here. */ break; @@ -855,11 +856,63 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, * only when we need to allocate a new super block. */ static int btrfs_parse_early_options(const char *options, fmode_t flags, - void *holder, char **subvol_name, u64 *subvol_objectid, - struct btrfs_fs_devices **fs_devices) + void *holder, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; + int error = 0; + + if (!options) + return 0; + + /* +* strsep changes the string, duplicate it because btrfs_parse_options +* gets called later +*/ + opts = kstrdup(options, GFP_KERNEL); + if (!opts) + return -ENOMEM; + orig = opts; + + while ((p = strsep(, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_device: + device_name = match_strdup([0]); + if (!device_name) { + error = -ENOMEM; + goto out; + } + error = btrfs_scan_one_device(device_name, + flags, holder, fs_devices); + kfree(device_name); + if (error) + goto out; + break; + default: + break; + } + } + +out: + kfree(orig); + return error; +} + +/* + * Parse mount options that are related to subvolume id + * + * The parsed value is later passed to mount_subvol() + */ +static int btrfs_parse_subvol_options(const char *options, fmode_t flags, + void *holder, char **subvol_name, u64 *subvol_objectid) +{ + substring_t args[MAX_OPT_ARGS]; + char *opts, *orig, *p; char *num = NULL; int error = 0; @@ -867,8 +920,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, return 0; /* -* strsep changes the string, duplicate it because parse_options -* gets called twice +* strsep changes the string, duplicate it because +* btrfs_parse_early_options gets called later */ opts = kstrdup(options, GFP_KERNEL); if (!opts) @@ -907,18 +960,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, case Opt_subvolrootid: pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n"); break; - case Opt_device: - device_name = match_strdup([0]); - if (!device_name) { - error = -ENOMEM; - goto out; - } - error = btrfs_scan_one_device(device_name, - flags, holder, fs_devices); - kfree(device_name); - if (error) - goto out; - break; default: break; } -- 2.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 1/3] btrfs: change btrfs_mount() to mount_root()
Remove subvol related part from btrfs_mount() and change its name to mount_root(). Also, file_system_type having mount_root() is defined for the third patch. New btrfs_mount() will be introduced in the third patch. Signed-off-by: Tomohiro Misono--- fs/btrfs/super.c | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 12540b6..3c32677 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -66,6 +66,7 @@ static const struct super_operations btrfs_super_ops; static struct file_system_type btrfs_fs_type; +static struct file_system_type btrfs_root_fs_type; static int btrfs_remount(struct super_block *sb, int *flags, char *data); @@ -1517,10 +1518,10 @@ static int setup_security_options(struct btrfs_fs_info *fs_info, /* * Find a superblock for the given device / mount point. * - * Note: This is based on get_sb_bdev from fs/super.c with a few additions + * Note: This is based on mount_bdev from fs/super.c with a few additions * for multiple device setup. Make sure to keep it in sync. */ -static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, +static struct dentry *mount_root(struct file_system_type *fs_type, int flags, const char *device_name, void *data) { struct block_device *bdev = NULL; @@ -1529,27 +1530,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, struct btrfs_fs_info *fs_info = NULL; struct security_mnt_opts new_sec_opts; fmode_t mode = FMODE_READ; - char *subvol_name = NULL; - u64 subvol_objectid = 0; int error = 0; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; error = btrfs_parse_early_options(data, mode, fs_type, - _name, _objectid, _devices); if (error) { - kfree(subvol_name); return ERR_PTR(error); } - if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) { - /* mount_subvol() will free subvol_name. */ - return mount_subvol(subvol_name, subvol_objectid, flags, - device_name, data); - } - security_init_mnt_opts(_sec_opts); if (data) { error = parse_security_options(data, _sec_opts); @@ -2133,6 +2124,15 @@ static struct file_system_type btrfs_fs_type = { .kill_sb= btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, }; + +static struct file_system_type btrfs_root_fs_type = { + .owner = THIS_MODULE, + .name = "btrfs", + .mount = mount_root, + .kill_sb= btrfs_kill_super, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, +}; + MODULE_ALIAS_FS("btrfs"); static int btrfs_control_open(struct inode *inode, struct file *file) -- 2.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 0/3] btrfs: cleanup mount path
Summary: Cleanup mount path by avoiding calling btrfs_mount() twice. No functional change. change to v2: split the patch into three parts. Long Explanation: btrfs uses mount_subtree() to mount a subvolume directly. This function needs a vfsmount* of device's root (/), which is a return value of vfs_kern_mount() (therefore root has to be mounted internally anyway). Current approach of getting root's vfsmount* in mount time is a bit tricky: 1. mount systemcall calls vfs_kern_mount() on the way 2. btrfs_mount() is called 3. btrfs_parse_early_options() parses "subvolid=" mount option and set the value to subvol_objectid. Otherwise, subvol_objectid has the initial value of 0 4. check subvol_objectid is 5 or not. This time id is not 5, and btrfs_mount() returns by calling mount_subvol() 5. In mount_subvol(), original mount options are modified to contain "subvolid=0" in setup_root_args(). Then, vfs_kern_mount() is called with this new options to get root's vfsmount* 6. btrfs_mount() is called again 7. btrfs_parse_early_options() parses "subvolid=0" and set 5 (instead of 0) to subvol_objectid 8. check subvol_objectid is 5 or not. This time id is 5 and mount_subvol() is not called. btrfs_mount() finishes mounting a root 9. (in mount_subvol()) with using a return vale of vfs_kern_mount(), it calls mount_subtree() 10 return subvolume's dentry As illustrated above, calling btrfs_mount() twice complicates the problem. Callback function of mount time (btrfs_mount()) is specified in struct file_system_type which is passed to vfs_kern_mount(). Therefore, we can avoid this by using another file_system_type for arguments of our vfs_kern_mount() call. There is no need of modifying mount options. In this approach: 1. btrfs_mount() is called 2. parse "subvolid=" opiton and set the value to subvol_objectid 3. mount device's root by calling vfs_kern_mount() with different file_system_type specified. Then, different callback function is called (mount_root()). Most of this new function is the same as the original btrfs_mount() 4. return by calling mount_subtree() I think this approach is the same as nfsv4, which is the only other filesystem using mount_subtree() currently, and easy to understand. Most of the change is done by just reorganizing the original code of btrfs_mount()/mount_subvol() into btrfs_mount()/mount_subvol()/mount_root() btrfs_parse_early_options() is split into two parts to avoid "device=" option will be handled twice (though it cause no harm). setup_root_args() is deleted as not needed anymore. Tomohiro Misono (3): change btrfs_mount() to mount_root() split parse_early_options() in two introduce new btrfs_mount() fs/btrfs/super.c | 231 ++- 1 file changed, 128 insertions(+), 103 deletions(-) -- 2.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: using fio to test btrfs compression
On Wed, Sep 20, 2017 at 5:26 PM, Timofey Titovetswrote: > 2017-09-20 14:10 GMT+03:00 shally verma : >> Interesting part is I dont see "encoded" under flags. I couldn't >> understand if flags are retrieved from btrfs file metadata info. As >> you are running on 4.14 and I am on 4.9 >> >> So, am still under doubt, even with dd if files are getting compressed. >> >> What is the filesize shown if you run >> btrfs fi du /mnt/test0.0 .. is it less or actual size? >> >> Is there any command that i can run to confirm file has been compressed? >> >> So far, I had my prints enabled in kernel/fs/btrfs/compression.c and >> check in dmesg that code jumped to compress_page() func. >> >> Thanks >> Shally >> > > Okay, lets play different. > encoded work for last several years for kernel releases, so you must see that. > > Reproduction script: > #!/bin/bash -e > > FILE_NAME=$RANDOM$RANDOM > TMP_DIR=$(mktemp -d) > IMAGE_FILE="$HOME/$FILE_NAME" > > truncate -s 4G $IMAGE_FILE > mkfs.btrfs -m single -L COMPRESS_TEST $IMAGE_FILE > mount -o compress-force $IMAGE_FILE $TMP_DIR > dd if=/dev/zero bs=128K count=2 of=$TMP_DIR/zero > sync > filefrag -v $TMP_DIR/zero > umount $TMP_DIR > rm -v $IMAGE_FILE > > Example output: > ~ sudo ./btrfs_compress_test.sh > btrfs-progs v4.13 > See http://btrfs.wiki.kernel.org for more information. > > Label: COMPRESS_TEST > UUID: abfedc39-dd94-4105-87d6-49eedb13467f > Node size: 16384 > Sector size:4096 > Filesystem size:4.00GiB > Block group profiles: > Data: single8.00MiB > Metadata: single8.00MiB > System: single4.00MiB > SSD detected: no > Incompat features: extref, skinny-metadata > Number of devices: 1 > Devices: > IDSIZE PATH >1 4.00GiB /root/322906281 > > 2+0 records in > 2+0 records out > 262144 bytes (262 kB, 256 KiB) copied, 0.000197746 s, 1.3 GB/s > Filesystem type is: 9123683e > File size of /tmp/tmp.bDyt3EkEG5/zero is 262144 (64 blocks of 4096 bytes) > ext: logical_offset:physical_offset: length: expected: flags: > 0:0.. 31: 3072.. 3103: 32: encoded > 1: 32.. 63: 3073.. 3104: 32: 3104: > last,encoded,eof > /tmp/tmp.bDyt3EkEG5/zero: 2 extents found > removed '/root/322906281' > > Good luck. Here's my output - Everything is same except: 1. nodesize and sector size = 64K 2. extent length = 2 3. I don't see "encoded" in filefrag here. btrfs-progs v4.13 See http://btrfs.wiki.kernel.org for more information. Label: COMPRESS_TEST UUID: fad6907e-d4eb-4dbb-9014-3918a822c9ce Node size: 65536 Sector size:65536 Filesystem size:4.00GiB Block group profiles: Data: single8.00MiB Metadata: single8.00MiB System: single4.00MiB SSD detected: no Incompat features: extref, skinny-metadata Number of devices: 1 Devices: IDSIZE PATH 1 4.00GiB /root/2808626087 2+0 records in 2+0 records out 262144 bytes (262 kB) copied, 0.00028777 s, 911 MB/s Filesystem type is: 9123683e File size of /tmp/tmp.346ESCdOIi/zero is 262144 (4 blocks of 65536 bytes) ext: logical_offset:physical_offset: length: expected: flags: 0:0.. 1:192.. 193: 2: 1:2.. 3:193.. 194: 2:194: eof /tmp/tmp.346ESCdOIi/zero: 2 extents found removed '/root/2808626087' And this is my dmesg [170127.417119] BTRFS: device label COMPRESS_TEST devid 1 transid 5 /dev/loop0 [170127.417493] BTRFS info (device loop0): force zlib compression [170127.417496] BTRFS info (device loop0): disk space caching is enabled [170127.417499] BTRFS info (device loop0): has skinny extents [170127.425858] BTRFS info (device loop0): creating UUID tree This is fio --version fio-3.0 What do we doubt here? Thanks Shally > -- > Have a nice day, > Timofey. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs: cleanup mount path
On 09/19/2017 10:05 AM, Misono, Tomohiro wrote: Summary: Cleanup mount path by avoiding calling btrfs_mount() twice. Right. Needs cleanup. This is for more understandable code and no functional change. However this patch diff isn't straight forward to review. Can you split this into smaller patch with reasonable changes. Thanks, Anand Explanation: btrfs uses mount_subtree() to mount a subvolume directly. This function needs a vfsmount* of device's root (/), which is a return value of vfs_kern_mount() (therefore root has to be mounted internally anyway). Current approach of getting root's vfsmount* in mount time is a bit tricky: 1. mount systemcall calls vfs_kern_mount() on the way 2. btrfs_mount() is called 3. btrfs_parse_early_options() parses "subvolid=" mount option and set the value to subvol_objectid. Otherwise, subvol_objectid has the initial value of 0 4. check subvol_objectid is 5 or not. This time id is not 5, and btrfs_mount() returns by calling mount_subvol() 5. In mount_subvol(), original mount options are modified to contain "subvolid=0" in setup_root_args(). Then, vfs_kern_mount() is called with this new options to get root's vfsmount* 6. btrfs_mount() is called again 7. btrfs_parse_early_options() parses "subvolid=0" and set 5 (instead of 0) to subvol_objectid 8. check subvol_objectid is 5 or not. This time id is 5 and mount_subvol() is not called. btrfs_mount() finishes mounting a root 9. (in mount_subvol()) with using a return vale of vfs_kern_mount(), it calls mount_subtree() 10 return subvolume's dentry As illustrated above, calling btrfs_mount() twice complicates the problem. Callback function of mount time (btrfs_mount()) is specified in struct file_system_type which is passed to vfs_kern_mount(). Therefore, we can avoid this by using another file_system_type for arguments of our vfs_kern_mount() call. There is no need of modifying mount options. In this approach: 1. btrfs_mount() is called 2. parse "subvolid=" option and set the value to subvol_objectid 3. mount device's root by calling vfs_kern_mount() with different file_system_type specified. Then, different callback function is called (mount_root()). Most of this new function is the same as the original btrfs_mount() 4. return by calling mount_subtree() I think this approach is the same as nfsv4, which is the only other filesystem using mount_subtree() currently, and easy to understand. Most of the change is done by just reorganizing the original code of btrfs_mount()/mount_subvol() into btrfs_mount()/mount_subvol()/mount_root() btrfs_parse_early_options() is split into two parts to avoid "device=" option will be handled twice (though it cause no harm). setup_root_args() is deleted as not needed anymore. Signed-off-by: Tomohiro Misono--- fs/btrfs/super.c | 226 ++- 1 file changed, 123 insertions(+), 103 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 12540b6..3a183c0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -66,6 +66,7 @@ static const struct super_operations btrfs_super_ops; static struct file_system_type btrfs_fs_type; +static struct file_system_type btrfs_root_fs_type; static int btrfs_remount(struct super_block *sb, int *flags, char *data); @@ -447,7 +448,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_subvolrootid: case Opt_device: /* -* These are parsed by btrfs_parse_early_options +* These are parsed by btrfs_parse_subvol_options +* and btrfs_parse_early_options * and can be happily ignored here. */ break; @@ -854,11 +856,58 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, * only when we need to allocate a new super block. */ static int btrfs_parse_early_options(const char *options, fmode_t flags, - void *holder, char **subvol_name, u64 *subvol_objectid, - struct btrfs_fs_devices **fs_devices) + void *holder, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; + int error = 0; + + if (!options) + return 0; + + /* +* strsep changes the string, duplicate it because btrfs_parse_options +* gets called later +*/ + opts = kstrdup(options, GFP_KERNEL); + if (!opts) + return -ENOMEM; + orig = opts; + + while ((p = strsep(, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_device: +
Re: defragmenting best practice?
Am Thu, 21 Sep 2017 22:10:13 +0200 schrieb Kai Krakow: > Am Wed, 20 Sep 2017 07:46:52 -0400 > schrieb "Austin S. Hemmelgarn" : > > > > Fragmentation: Files with a lot of random writes can become > > > heavily fragmented (1+ extents) causing excessive multi-second > > > spikes of CPU load on systems with an SSD or large amount a RAM. > > > On desktops this primarily affects application databases > > > (including Firefox). Workarounds include manually defragmenting > > > your home directory using btrfs fi defragment. Auto-defragment > > > (mount option autodefrag) should solve this problem. > > > > > > Upon reading that I am wondering if fragmentation in the Firefox > > > profile is part of my issue. That's one thing I never tested > > > previously. (BTW, this system has 256 GB of RAM and 20 cores.) > > Almost certainly. Most modern web browsers are brain-dead and > > insist on using SQLite databases (or traditional DB files) for > > everything, including the cache, and the usage for the cache in > > particular kills performance when fragmentation is an issue. > > At least in Chrome, you can turn on simple cache backend, which, I > think, is using many small instead of one huge file. This suit btrfs > much better: > > chrome://flags/#enable-simple-cache-backend > > > And then I suggest also doing this (as your login user): > > $ cd $HOME > $ mv .cache .cache.old > $ mkdir .cache > $ lsattr +C .cache Oops, of course that's chattr, not lsattr > $ rsync -av .cache.old/ .cache/ > $ rm -Rf .cache.old > > This makes caches for most applications nocow. Chrome performance was > completely fixed for me by doing this. > > I'm not sure where Firefox puts its cache, I only use it on very rare > occasions. But I think it's going to .cache/mozilla last time looked > at it. > > You may want to close all apps before converting the cache directory. > > Also, I don't see any downsides in making this nocow. That directory > could easily be also completely volatile. If something breaks due to > no longer protected by data csum, just clean it out. -- Regards, Kai Replies to list-only preferred. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: defragmenting best practice?
These are great suggestions. I will test several of them (or all of them) and report back with my results once I have done the testing. Thank you! This is a fantastic mailing list. P.S. I'm inclined to stay with Firefox, but I will definitely test Chromium vs Firefox after making a series of changes based on the suggestions here. I would hate to see the market lose the option of Firefox because everyone goes to Chrome/Chromium. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2
On Thu, Sep 21, 2017 at 04:16:35PM -0400, Zygo Blaxell wrote: > On Thu, Sep 21, 2017 at 12:59:42PM -0700, Darrick J. Wong wrote: > > On Thu, Sep 21, 2017 at 12:10:15AM -0400, Zygo Blaxell wrote: > > > Now that check_extent_in_eb()'s extent offset filter can be turned off, > > > we need a way to do it from userspace. > > > > > > Add a 'flags' field to the btrfs_logical_ino_args structure to disable > > > extent > > > offset filtering, taking the place of one of the reserved[] fields. > > > > > > Previous versions of LOGICAL_INO neglected to check whether any of the > > > reserved fields have non-zero values. Assigning meaning to those fields > > > now may change the behavior of existing programs that left these fields > > > uninitialized. > > > > > > To avoid any surprises, define a new ioctl LOGICAL_INO_V2 which uses > > > the same argument layout as LOGICAL_INO, but uses one of the reserved > > > fields for flags. The V2 ioctl explicitly checks that unsupported flag > > > bits are zero so that userspace can probe for future feature bits as > > > they are defined. If the other reserved fields are used in the future, > > > one of the remaining flag bits could specify that the other reserved > > > fields are valid, so we don't need to check those for now. > > > > > > Since the memory layouts and behavior of the two ioctls' arguments > > > are almost identical, there is no need for a separate function for > > > logical_to_ino_v2 (contrast with tree_search_v2 vs tree_search). > > > A version parameter and an 'if' statement will suffice. > > > > > > Now that we have a flags field in logical_ino_args, add a flag > > > BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET to get the behavior we want, > > > and pass it down the stack to iterate_inodes_from_logical. > > > > > > Signed-off-by: Zygo Blaxell> > > --- > > > fs/btrfs/ioctl.c | 21 ++--- > > > include/uapi/linux/btrfs.h | 8 +++- > > > 2 files changed, 25 insertions(+), 4 deletions(-) > > > > > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > > > index b7de32568082..2bc3a9588d1d 100644 > > > --- a/fs/btrfs/ioctl.c > > > +++ b/fs/btrfs/ioctl.c > > > @@ -4536,13 +4536,14 @@ static int build_ino_list(u64 inum, u64 offset, > > > u64 root, void *ctx) > > > } > > > > > > static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, > > > - void __user *arg) > > > + void __user *arg, int version) > > > { > > > int ret = 0; > > > int size; > > > struct btrfs_ioctl_logical_ino_args *loi; > > > struct btrfs_data_container *inodes = NULL; > > > struct btrfs_path *path = NULL; > > > + bool ignore_offset; > > > > > > if (!capable(CAP_SYS_ADMIN)) > > > return -EPERM; > > > @@ -4551,6 +4552,17 @@ static long btrfs_ioctl_logical_to_ino(struct > > > btrfs_fs_info *fs_info, > > > if (IS_ERR(loi)) > > > return PTR_ERR(loi); > > > > > > + if (version == 1) { > > > + ignore_offset = false; > > > + } else { > > > + /* Only accept flags we have defined so far */ > > > + if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) { > > > + ret = -EINVAL; > > > + goto out_loi; > > > + } > > > + ignore_offset = loi->flags & > > > BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; > > > > Please check loi->reserved[3] for zeroness so that the next person who > > wants to add a field to btrfs_ioctl_logical_ino_args doesn't have to > > create LOGICAL_INO_V3 for the same reason you're creating V2. > > OK now I'm confused, in several distinct ways. > > I wonder if you meant reserved[1] and reserved[2] there, since I'm not > checking them (for reasons stated in the commit log--we can use flags > to indicate whether and what values are present there). You can do that, though that means you have to burn flag bits to light up the remaining reserved area, which means you can't in the future decide that a non-zero field value will turn on some new feature. You retain the ability to use flag bits to turn on the new field, if it's the case that zero has a meaning. > But that's not the bigger problem. Maybe you did mean reserved[3], but > there's no "reserved[3]" any more. I shortened the reserved array from > 4 elements to 3, so "reserved[3]" is no longer a valid memory reference. > Also "reserved[0]" no longer refers to the same thing it once did. Oops, sorry, that was a typo, I meant reserved[], as in 'check the whole array via memchr_inv'. --D > > > --D > > > > > + } > > > + > > > path = btrfs_alloc_path(); > > > if (!path) { > > > ret = -ENOMEM; > > > @@ -4566,7 +4578,7 @@ static long btrfs_ioctl_logical_to_ino(struct > > > btrfs_fs_info *fs_info, > > > } > > > > > > ret = iterate_inodes_from_logical(loi->logical, fs_info, path, > > > - build_ino_list, inodes, false); > > > +
Re: [PATCH 2/3] btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2
On Thu, Sep 21, 2017 at 12:59:42PM -0700, Darrick J. Wong wrote: > On Thu, Sep 21, 2017 at 12:10:15AM -0400, Zygo Blaxell wrote: > > Now that check_extent_in_eb()'s extent offset filter can be turned off, > > we need a way to do it from userspace. > > > > Add a 'flags' field to the btrfs_logical_ino_args structure to disable > > extent > > offset filtering, taking the place of one of the reserved[] fields. > > > > Previous versions of LOGICAL_INO neglected to check whether any of the > > reserved fields have non-zero values. Assigning meaning to those fields > > now may change the behavior of existing programs that left these fields > > uninitialized. > > > > To avoid any surprises, define a new ioctl LOGICAL_INO_V2 which uses > > the same argument layout as LOGICAL_INO, but uses one of the reserved > > fields for flags. The V2 ioctl explicitly checks that unsupported flag > > bits are zero so that userspace can probe for future feature bits as > > they are defined. If the other reserved fields are used in the future, > > one of the remaining flag bits could specify that the other reserved > > fields are valid, so we don't need to check those for now. > > > > Since the memory layouts and behavior of the two ioctls' arguments > > are almost identical, there is no need for a separate function for > > logical_to_ino_v2 (contrast with tree_search_v2 vs tree_search). > > A version parameter and an 'if' statement will suffice. > > > > Now that we have a flags field in logical_ino_args, add a flag > > BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET to get the behavior we want, > > and pass it down the stack to iterate_inodes_from_logical. > > > > Signed-off-by: Zygo Blaxell> > --- > > fs/btrfs/ioctl.c | 21 ++--- > > include/uapi/linux/btrfs.h | 8 +++- > > 2 files changed, 25 insertions(+), 4 deletions(-) > > > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > > index b7de32568082..2bc3a9588d1d 100644 > > --- a/fs/btrfs/ioctl.c > > +++ b/fs/btrfs/ioctl.c > > @@ -4536,13 +4536,14 @@ static int build_ino_list(u64 inum, u64 offset, u64 > > root, void *ctx) > > } > > > > static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, > > - void __user *arg) > > + void __user *arg, int version) > > { > > int ret = 0; > > int size; > > struct btrfs_ioctl_logical_ino_args *loi; > > struct btrfs_data_container *inodes = NULL; > > struct btrfs_path *path = NULL; > > + bool ignore_offset; > > > > if (!capable(CAP_SYS_ADMIN)) > > return -EPERM; > > @@ -4551,6 +4552,17 @@ static long btrfs_ioctl_logical_to_ino(struct > > btrfs_fs_info *fs_info, > > if (IS_ERR(loi)) > > return PTR_ERR(loi); > > > > + if (version == 1) { > > + ignore_offset = false; > > + } else { > > + /* Only accept flags we have defined so far */ > > + if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) { > > + ret = -EINVAL; > > + goto out_loi; > > + } > > + ignore_offset = loi->flags & > > BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; > > Please check loi->reserved[3] for zeroness so that the next person who > wants to add a field to btrfs_ioctl_logical_ino_args doesn't have to > create LOGICAL_INO_V3 for the same reason you're creating V2. OK now I'm confused, in several distinct ways. I wonder if you meant reserved[1] and reserved[2] there, since I'm not checking them (for reasons stated in the commit log--we can use flags to indicate whether and what values are present there). But that's not the bigger problem. Maybe you did mean reserved[3], but there's no "reserved[3]" any more. I shortened the reserved array from 4 elements to 3, so "reserved[3]" is no longer a valid memory reference. Also "reserved[0]" no longer refers to the same thing it once did. > --D > > > + } > > + > > path = btrfs_alloc_path(); > > if (!path) { > > ret = -ENOMEM; > > @@ -4566,7 +4578,7 @@ static long btrfs_ioctl_logical_to_ino(struct > > btrfs_fs_info *fs_info, > > } > > > > ret = iterate_inodes_from_logical(loi->logical, fs_info, path, > > - build_ino_list, inodes, false); > > + build_ino_list, inodes, > > ignore_offset); > > if (ret == -EINVAL) > > ret = -ENOENT; > > if (ret < 0) > > @@ -4580,6 +4592,7 @@ static long btrfs_ioctl_logical_to_ino(struct > > btrfs_fs_info *fs_info, > > out: > > btrfs_free_path(path); > > kvfree(inodes); > > +out_loi: > > kfree(loi); > > > > return ret; > > @@ -5550,7 +5563,9 @@ long btrfs_ioctl(struct file *file, unsigned int > > case BTRFS_IOC_INO_PATHS: > > return btrfs_ioctl_ino_to_path(root, argp); > > case BTRFS_IOC_LOGICAL_INO: > > - return
Re: defragmenting best practice?
Am Wed, 20 Sep 2017 07:46:52 -0400 schrieb "Austin S. Hemmelgarn": > > Fragmentation: Files with a lot of random writes can become > > heavily fragmented (1+ extents) causing excessive multi-second > > spikes of CPU load on systems with an SSD or large amount a RAM. On > > desktops this primarily affects application databases (including > > Firefox). Workarounds include manually defragmenting your home > > directory using btrfs fi defragment. Auto-defragment (mount option > > autodefrag) should solve this problem. > > > > Upon reading that I am wondering if fragmentation in the Firefox > > profile is part of my issue. That's one thing I never tested > > previously. (BTW, this system has 256 GB of RAM and 20 cores.) > Almost certainly. Most modern web browsers are brain-dead and insist > on using SQLite databases (or traditional DB files) for everything, > including the cache, and the usage for the cache in particular kills > performance when fragmentation is an issue. At least in Chrome, you can turn on simple cache backend, which, I think, is using many small instead of one huge file. This suit btrfs much better: chrome://flags/#enable-simple-cache-backend And then I suggest also doing this (as your login user): $ cd $HOME $ mv .cache .cache.old $ mkdir .cache $ lsattr +C .cache $ rsync -av .cache.old/ .cache/ $ rm -Rf .cache.old This makes caches for most applications nocow. Chrome performance was completely fixed for me by doing this. I'm not sure where Firefox puts its cache, I only use it on very rare occasions. But I think it's going to .cache/mozilla last time looked at it. You may want to close all apps before converting the cache directory. Also, I don't see any downsides in making this nocow. That directory could easily be also completely volatile. If something breaks due to no longer protected by data csum, just clean it out. -- Regards, Kai Replies to list-only preferred. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2
On Thu, Sep 21, 2017 at 12:10:15AM -0400, Zygo Blaxell wrote: > Now that check_extent_in_eb()'s extent offset filter can be turned off, > we need a way to do it from userspace. > > Add a 'flags' field to the btrfs_logical_ino_args structure to disable extent > offset filtering, taking the place of one of the reserved[] fields. > > Previous versions of LOGICAL_INO neglected to check whether any of the > reserved fields have non-zero values. Assigning meaning to those fields > now may change the behavior of existing programs that left these fields > uninitialized. > > To avoid any surprises, define a new ioctl LOGICAL_INO_V2 which uses > the same argument layout as LOGICAL_INO, but uses one of the reserved > fields for flags. The V2 ioctl explicitly checks that unsupported flag > bits are zero so that userspace can probe for future feature bits as > they are defined. If the other reserved fields are used in the future, > one of the remaining flag bits could specify that the other reserved > fields are valid, so we don't need to check those for now. > > Since the memory layouts and behavior of the two ioctls' arguments > are almost identical, there is no need for a separate function for > logical_to_ino_v2 (contrast with tree_search_v2 vs tree_search). > A version parameter and an 'if' statement will suffice. > > Now that we have a flags field in logical_ino_args, add a flag > BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET to get the behavior we want, > and pass it down the stack to iterate_inodes_from_logical. > > Signed-off-by: Zygo Blaxell> --- > fs/btrfs/ioctl.c | 21 ++--- > include/uapi/linux/btrfs.h | 8 +++- > 2 files changed, 25 insertions(+), 4 deletions(-) > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index b7de32568082..2bc3a9588d1d 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -4536,13 +4536,14 @@ static int build_ino_list(u64 inum, u64 offset, u64 > root, void *ctx) > } > > static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, > - void __user *arg) > + void __user *arg, int version) > { > int ret = 0; > int size; > struct btrfs_ioctl_logical_ino_args *loi; > struct btrfs_data_container *inodes = NULL; > struct btrfs_path *path = NULL; > + bool ignore_offset; > > if (!capable(CAP_SYS_ADMIN)) > return -EPERM; > @@ -4551,6 +4552,17 @@ static long btrfs_ioctl_logical_to_ino(struct > btrfs_fs_info *fs_info, > if (IS_ERR(loi)) > return PTR_ERR(loi); > > + if (version == 1) { > + ignore_offset = false; > + } else { > + /* Only accept flags we have defined so far */ > + if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) { > + ret = -EINVAL; > + goto out_loi; > + } > + ignore_offset = loi->flags & > BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; Please check loi->reserved[3] for zeroness so that the next person who wants to add a field to btrfs_ioctl_logical_ino_args doesn't have to create LOGICAL_INO_V3 for the same reason you're creating V2. --D > + } > + > path = btrfs_alloc_path(); > if (!path) { > ret = -ENOMEM; > @@ -4566,7 +4578,7 @@ static long btrfs_ioctl_logical_to_ino(struct > btrfs_fs_info *fs_info, > } > > ret = iterate_inodes_from_logical(loi->logical, fs_info, path, > - build_ino_list, inodes, false); > + build_ino_list, inodes, > ignore_offset); > if (ret == -EINVAL) > ret = -ENOENT; > if (ret < 0) > @@ -4580,6 +4592,7 @@ static long btrfs_ioctl_logical_to_ino(struct > btrfs_fs_info *fs_info, > out: > btrfs_free_path(path); > kvfree(inodes); > +out_loi: > kfree(loi); > > return ret; > @@ -5550,7 +5563,9 @@ long btrfs_ioctl(struct file *file, unsigned int > case BTRFS_IOC_INO_PATHS: > return btrfs_ioctl_ino_to_path(root, argp); > case BTRFS_IOC_LOGICAL_INO: > - return btrfs_ioctl_logical_to_ino(fs_info, argp); > + return btrfs_ioctl_logical_to_ino(fs_info, argp, 1); > + case BTRFS_IOC_LOGICAL_INO_V2: > + return btrfs_ioctl_logical_to_ino(fs_info, argp, 2); > case BTRFS_IOC_SPACE_INFO: > return btrfs_ioctl_space_info(fs_info, argp); > case BTRFS_IOC_SYNC: { > diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h > index 378230c163d5..0b3de597e04f 100644 > --- a/include/uapi/linux/btrfs.h > +++ b/include/uapi/linux/btrfs.h > @@ -608,10 +608,14 @@ struct btrfs_ioctl_ino_path_args { > struct btrfs_ioctl_logical_ino_args { > __u64 logical;/* in */ > __u64 size; /* in
Re: defragmenting best practice?
On September 19, 2017 11:38:13 PM PDT, Davewrote: >>On Thu 2017-08-31 (09:05), Ulli Horlacher wrote: > >Here's my scenario. Some months ago I built an over-the-top powerful >desktop computer / workstation and I was looking forward to really >fantastic performance improvements over my 6 year old Ubuntu machine. >I installed Arch Linux on BTRFS on the new computer (on an SSD). To my >shock, it was no faster than my old machine. I focused a lot on >Firefox performance because I use Firefox a lot and that was one of >the applications in which I was most looking forward to better >performance. > > > >What would you guys do in this situation? Check out profile sync daemon: https://wiki.archlinux.org/index.php/profile-sync-daemon It keeps the active profile files in a ramfs, periodically syncing them back to disk. It works quite well on my 7 year old netbook. --Sean -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: SSD caching an existing btrfs raid1
On 20/09/17 22:45, Kai Krakow wrote: Am Wed, 20 Sep 2017 17:51:15 +0200 schrieb Psalle: On 19/09/17 17:47, Austin S. Hemmelgarn wrote: (...) A better option if you can afford to remove a single device from that array temporarily is to use bcache. Bcache has one specific advantage in this case, multiple backend devices can share the same cache device. This means you don't have to carve out dedicated cache space for each disk on the SSD and leave some unused space so that you can add new devices if needed. The downside is that you can't convert each device in-place, but because you're using BTRFS, you can still convert the volume as a whole in-place. The procedure for doing so looks like this: 1. Format the SSD as a bcache cache. 2. Use `btrfs device delete` to remove a single hard drive from the array. 3. Set up the drive you just removed as a bcache backing device bound to the cache you created in step 1. 4. Add the new bcache device to the array. 5. Repeat from step 2 until the whole array is converted. A similar procedure can actually be used to do almost any underlying storage conversion (for example, switching to whole disk encryption, or adding LVM underneath BTRFS) provided all your data can fit on one less disk than you have. Thanks Austin, that's just great. For some reason I had discarded bcache thinking that it would force me to rebuild from scratch, but this kind of incremental migration is exactly why I hoped was possible. I have plenty of space to replace the devices one by one. I will report back my experience in a few days, I hope. I've done it exactly that way in the past and it worked flawlessly (but it took 24+ hours). But it was easy for me because I was also adding a third disk to the pool, so existing stuff could easily move. I suggest to initialize bcache to writearound mode while converting, so your maybe terabytes of disk don't go through the SSD. If you later decide to remove bcache or not sure about future bcache usage, you can wrap any partition into a bcache container - just don't connect it to a cache and it will work like a normal partition. Those are good advices. I've finished now and it seems to have gone without a hitch. Thanks! -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: using fio to test btrfs compression
On Mon, Sep 18, 2017 at 01:06:45PM +0530, shally verma wrote: > Hi > > I wanted to test btrfs compression using fio command but somehow > during fio writes, I don't see code taking route of compression blocks > where as If I do a copy to btrfs compression enabled mount point then > I can easily see code falling through compression.c. > > Here's how I do my setup > > 1. mkfs.btrfs /dev/sdb1 > 2. mount -t btrfs -o compress=zlib,compress-force /dev/sdb1 /mnt > 3. cp /mnt > 4. dmesg shows print staments from compression.c and zlib.c confirming > compression routine was invoked during write > 5. now, copy back from btrfs mount point to home directory also shows > decompress call invokation > > Now, try same with fio commands: > > fio command > > fio --directory=/mnt/ --numjobs=1 --direct=0 --buffered=1 > --ioengine=libaio --group_reporting --bs=64k --rw=write --iodepth=128 > --name=test --size=10G --runtime=180 --time_based fio by default uses fallocate (posix_falloc) to pre-allocate space for the later writes, and PREALLOC path overrides compression path. Like others mentioned, after fio and sync, you'll see 'encoded' in filefrag -v your_file. thanks, -liubo > > But it seems to write uncompressed data. > > Any help here? what's missing? > > Thanks > Shally > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Help Recovering BTRFS array
Hi Duncan, I'm not sure if this will attache to my original message... Thank you for your reply. For some reason i'm not getting list messages even tho i know i am subscribed. I know all to well about the golden rule of data. It has bitten me a few times. The data on this array is mostly data that i don't really care about. I was able to copy off what i wanted. The main reason i sent it to the list was just to see if i could somehow return the FS to a working state without having to recreate. I'm just surprised that all 3 copies of the super block got corrupted. Probably my lack of understanding but i always assumed that if one copy got corrupted it would be replaced by a good copy therefore leaving all copies in a good state. Is that not the case. If it is then what back luck that all 3 got messed up at same time. Some information i forgot to include in my original message uname -a Linux thebeach 4.12.13-gentoo-GMAN #1 SMP Sat Sep 16 15:28:26 ADT 2017 x86_64 Intel(R) Core(TM) i5-2320 CPU @ 3.00GHz GenuineIntel GNU/Linux btrfs --version btrfs-progs v4.10.2 Anyways thank you again for your reply. I will leave the FS intact for a few days in case anymore details could help the development of BTRFS and maybe avoid this happening or having a recovery option. Marc -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: using fio to test btrfs compression
shally verma posted on Wed, 20 Sep 2017 16:40:15 +0530 as excerpted: > Is there any command that i can run to confirm file has been compressed? There is the quite recently posted (and actively updated since then) compsize command. https://github.com/kilobyte/compsize -- Duncan - List replies preferred. No HTML msgs. "Every nonfree program has a lord, a master -- and if you use the program, he is your master." Richard Stallman -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: defragmenting best practice?
Dave posted on Wed, 20 Sep 2017 02:38:13 -0400 as excerpted: > Here's my scenario. Some months ago I built an over-the-top powerful > desktop computer / workstation and I was looking forward to really > fantastic performance improvements over my 6 year old Ubuntu machine. I > installed Arch Linux on BTRFS on the new computer (on an SSD). To my > shock, it was no faster than my old machine. I focused a lot on Firefox > performance because I use Firefox a lot and that was one of the > applications in which I was most looking forward to better performance. > > I tried everything I could think of and everything recommended to me in > various forums (except switching to Windows) and the performance > remained very disappointing. > > Then today I read the following: > > Gotchas - btrfs Wiki https://btrfs.wiki.kernel.org/index.php/Gotchas > > Fragmentation: Files with a lot of random writes can become > heavily fragmented (1+ extents) causing excessive multi-second > spikes of CPU load on systems with an SSD or large amount a RAM. On > desktops this primarily affects application databases (including > Firefox). Workarounds include manually defragmenting your home directory > using btrfs fi defragment. Auto-defragment (mount option autodefrag) > should solve this problem. > > Upon reading that I am wondering if fragmentation in the Firefox profile > is part of my issue. That's one thing I never tested previously. (BTW, > this system has 256 GB of RAM and 20 cores.) > > Furthermore, on the same BTRFS Wiki page, it mentions the performance > penalties of many snapshots. I am keeping 30 to 50 snapshots of the > volume that contains the Firefox profile. > > Would these two things be enough to turn top-of-the-line hardware into a > mediocre-preforming desktop system? (The system performs fine on > benchmarks -- it's real life usage, particularly with Firefox where it > is disappointing.) > > After reading the info here, I am wondering if I should make a new > subvolume just for my Firefox profile(s) and not use COW and/or not keep > snapshots on it and mount it with the autodefrag option. > > As part of this strategy, I could send snapshots to another disk using > btrfs send-receive. That way I would have the benefits of snapshots > (which are important to me), but by not keeping any snapshots on the > live subvolume I could avoid the performance problems. > > What would you guys do in this situation? [FWIW this is my second try at a reply, my first being way too detailed and going off into the weeds somewhere, so I killed it.] That's an interesting scenario indeed, and perhaps I can help, since my config isn't near as high end as yours, but I run firefox on btrfs on ssds, and have no performance complaints. The difference is very likely due to one or more of the following (FWIW I'd suggest a 4-3-1-2 order, tho only 1 and 2 are really btrfs related): 1) I make sure I consistently mount with autodefrag, from the first mount after the filesystem is created in ordered to first populate it, on. The filesystem never gets fragmented, forcing writes to highly fragmented free space, in the first place. (With the past and current effect of the ssd mount option under discussion to change, it's possible I'll get more fragmentation in the future after ssd doesn't try so hard to find reasonably large free-space chunks to write into, but it has been fine so far.) 2) Subvolumes and snapshots seemed to me more trouble than they were worth, particularly since it's the same filesystem anyway, and if it's damaged, it'll take all the subvolumes and snapshots with it. So I don't use them, preferring instead to use real partitioning and more smaller fully separate filesystems, some of which aren't mounted by default (and root mounted read-only by default), so there's little chance they'll be damaged in a crash or filesystem bug damage scenario. And if there /is/ any damage, it's much more limited in scope since all my data eggs aren't in the same basket, so maintenance such as btrfs check and scrub take far less time (and check far less memory) than they would were it one big pool with snapshots. And if recovery fails too, the backups are likewise small filesystems the same size as the working copies, so copying the data back over takes far less time as well (not to mention making the backups takes less time in the first place, so it's easier to regularly update them). 3) Austin mentioned the firefox cache. I honestly wouldn't know on it, since I have firefox configured to use a tmpfs for its cache, so it operates at memory speed and gets cleared along with its memory at every reboot or tmpfs umount. My inet speed is fast enough I don't really need cache anyway, but it's nice to have it, operating at memory speed, within a single boot session... and to have it cleared on reboot. 4) This one was the biggest one for me for awhile. Is firefox running in multi-process mode?
Re: Storage and snapshots as historical yearly
El martes, 19 de septiembre de 2017 21:33:31 (CEST) Andrei Borzenkov escribió: > 19.09.2017 14:49, Senén Vidal Blanco пишет: > > Perfect!! Just what I was looking for. > > Sorry for the delay, because before doing so, I preferred to test to see > > if it actually worked. > > > > I have a doubt. The system works perfectly, but at the time of deleting > > the > > writing disk and merging the data on the read-only disk I fail to > > understand the process. > > > > I have tried to remove the seed bit on disk A and delete the write B as > > you > > mention, and so move the data to A, but tells me that disk B does not > > exist. These are the orders I have made: > > > > md127-> A > > md126-> B > > > > btrfstune -S 0 /dev /md127 > > mount /dev/md127 /mnt (I mount this disk since the md126 gives error) > > btrfs device delete /dev/md126 /mnt > > ERROR: error removing device '/dev/md126': No such file or directory > > > > Another thing I've tried is to remove disk B without removing the seed > > bit, > > but it gives me the error: > > > > ERROR: error removing device '/dev/md126': unable to remove the only > > writeable device. > > > > Any ideas about it? > > Yes, sorry about it. Clearing seed flag on device invalidates > filesystem. What you can do, is to rotate devices. I.e. remove > /dev/md126, set seed flag on md127 and add md126 back. > > I actually tested it and it works for me. > OK thanks Now I see how it works :)) With the commands: mount /dev/md126 /mnt btrfs device remove /dev/md127 /mnt We remove the read-only array (A) from the BTRFS system and in doing so pass all the information from (A) to (B) read-write to mix them. From what I see is not bad since both (A) and (B) are still operational. (A) with last year and (B) with everything current. Finally with this other commands: btrfstune -S 1 /dev/md126 mount /dev/md126 /mnt btrfs device add -f /dev/md127 /mnt we activate the seed bit in md126 (B) and add the (A) in read-write mode, where the new files will be archived and (B) as store until the following year and (A) do clean to fill in it new data. I have tried to rotate twice to see if it goes well and smoothly. Just comment that I see two small problems to this: 1. The transfer of data from (A) to (B) when removing the read-only disk takes quite a while and more the more it has stored in the history. It would be nice if the process were reversed, since in (B) there are fewer "data" stored. Also, I could not use it monthly or daily for this reason. 2. My idea was to have a larger A-disk than B where I would save the historical ones, because so in B I could put a smaller disk and something faster. If the decoupling process outside read-write rather than read-only and passed the data to A would be ideal for this case. On the other hand, as an anecdote only, and perhaps for lack of experience or knowledge, I have used the entire linux system in BTRFS (@ and @home) format and a single partition md126 to have the system bootable and running simply by attaching the disk to the computer in degraded mode (swap outside the raid , which I'm not so bad: P). This has made that by rotating disks A and B I have had some problems with grub and fstab at boot, which I had to overcome by making changes to the boot configurations and some more botches. I'm going to see a couple more things and if there's any way I can combine this with snapshots and see if the bulb will light up. If I do not get it I will try with the other filesystems that you have suggested to me. Although honestly, I like BTRFS more than the other alternatives, I already use BTRFS on 5 computers and it goes very well. Greetings. > > Thank you very much for the reply. > > Greetings. > > > > El martes, 12 de septiembre de 2017 6:34:15 (CEST) Andrei Borzenkov escribió: > >> 11.09.2017 21:17, Senén Vidal Blanco пишет: > >>> I am trying to implement a system that stores the data in a unit (A) > >>> with > >>> BTRFS format that is untouchable and that future files and folders > >>> created > >>> or modified are stored in another physical unit (B) with BTRFS format. > >>> Each year the new files will be moved to store A and start over. > >>> > >>> The idea is that a duplicate of disk A can be made to keep it in a safe > >>> place and that the files stored there can not be modified until the > >>> mixture of (A) and (B) is made. > >> > >> This can probably be achieved using seed device. Mark original device as > >> seed and all changes will go to another writable device, similar to > >> overlay; then remove seed bit from original device, "btrfs device remove > >> writable" device and it should relocate its content back. Rinse and > >> repeat. -- Senén Vidal Blanco - SGISoft S.L. Tlf.: 986413322 - 660923711 GPG ID 466431A8AF01F99A http://www.sgisoft.com/ -- signature.asc Description: This is a digitally signed message part.
Re: [PATCH] fstests: btrfs/150 regression test for reading compressed data
On Wed, Sep 20, 2017 at 05:52:43PM -0600, Liu Bo wrote: >We had a bug in btrfs compression code which could end up with a >kernel panic. > >This is adding a regression test for the bug and I've also sent a >kernel patch to fix the bug. > >The patch is "Btrfs: fix kernel oops while reading compressed data". > >Signed-off-by: Liu Bo>--- > tests/btrfs/150 | 102 > tests/btrfs/150.out | 3 ++ > tests/btrfs/group | 1 + > 3 files changed, 106 insertions(+) > create mode 100755 tests/btrfs/150 > create mode 100644 tests/btrfs/150.out > >diff --git a/tests/btrfs/150 b/tests/btrfs/150 >new file mode 100755 >index 000..834be51 >--- /dev/null >+++ b/tests/btrfs/150 >@@ -0,0 +1,102 @@ >+#! /bin/bash >+# FS QA Test btrfs/150 >+# >+# This is a regression test which ends up with a kernel oops in btrfs. >+# It occurs when btrfs's read repair happens while reading a compressed >+# extent. >+# The patch for this is >+# x >+# >+#--- >+# Copyright (c) 2017 Liu Bo. All Rights Reserved. >+# >+# This program is free software; you can redistribute it and/or >+# modify it under the terms of the GNU General Public License as >+# published by the Free Software Foundation. >+# >+# This program is distributed in the hope that it would be useful, >+# but WITHOUT ANY WARRANTY; without even the implied warranty of >+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+# GNU General Public License for more details. >+# >+# You should have received a copy of the GNU General Public License >+# along with this program; if not, write the Free Software Foundation, >+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA >+#--- >+# >+ >+seq=`basename $0` >+seqres=$RESULT_DIR/$seq >+echo "QA output created by $seq" >+ >+here=`pwd` >+tmp=/tmp/$$ >+status=1 # failure is the default! >+trap "_cleanup; exit \$status" 0 1 2 3 15 >+ >+_cleanup() >+{ >+ cd / >+ rm -f $tmp.* >+} >+ >+# get standard environment, filters and checks >+. ./common/rc >+. ./common/filter >+ >+# remove previous $seqres.full before test >+rm -f $seqres.full >+ >+# real QA test starts here >+ >+# Modify as appropriate. >+_supported_fs btrfs >+_supported_os Linux >+_require_scratch >+_require_fail_make_request >+_require_scratch_dev_pool 2 >+ >+SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV` >+enable_io_failure() >+{ >+echo 100 > $DEBUGFS_MNT/fail_make_request/probability >+echo 1000 > $DEBUGFS_MNT/fail_make_request/times What does 1000 mean? Enough failures? Why not set times to -1? >+echo 0 > $DEBUGFS_MNT/fail_make_request/verbose >+echo 1 > $SYSFS_BDEV/make-it-fail >+} >+ >+disable_io_failure() >+{ >+echo 0 > $SYSFS_BDEV/make-it-fail >+echo 0 > $DEBUGFS_MNT/fail_make_request/probability >+echo 0 > $DEBUGFS_MNT/fail_make_request/times >+} >+ >+_scratch_pool_mkfs "-d raid1 -b 1G" >> $seqres.full 2>&1 >+ >+# It doesn't matter which compression algorithm we use. >+_scratch_mount -ocompress >+ >+# Create a file with all data being compressed >+$XFS_IO_PROG -f -c "pwrite -W 0 8K" $SCRATCH_MNT/foobar | _filter_xfs_io >+ >+# Raid1 consists of two copies and btrfs decides which copy to read by >reader's >+# %pid. Now we inject errors to copy #1 and copy #0 is good. We want to read >+# the bad copy to trigger read-repair. >+while true; do >+ disable_io_failure >+ # invalidate the page cache >+ $XFS_IO_PROG -f -c "fadvise -d 0 128K" $SCRATCH_MNT/foobar | >_filter_xfs_io >+ >+ enable_io_failure >+ od -x $SCRATCH_MNT/foobar > /dev/null & >+ pid=$! >+ wait >+ [ $((pid % 2)) == 1 ] && break >+done >+ >+disable_io_failure >+ >+# success, all done >+status=0 >+exit >diff --git a/tests/btrfs/150.out b/tests/btrfs/150.out >new file mode 100644 >index 000..c492c24 >--- /dev/null >+++ b/tests/btrfs/150.out >@@ -0,0 +1,3 @@ >+QA output created by 150 >+wrote 8192/8192 bytes at offset 0 >+XXX Bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) >diff --git a/tests/btrfs/group b/tests/btrfs/group >index 70c3f05..b70a122 100644 >--- a/tests/btrfs/group >+++ b/tests/btrfs/group >@@ -152,3 +152,4 @@ > 147 auto quick send > 148 auto quick rw > 149 auto quick send compress >+150 auto quick >-- >2.5.0 > >-- >To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in >the body of a message to majord...@vger.kernel.org >More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- Thanks, Lu -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: SSD caching an existing btrfs raid1
> -Original Message- > From: linux-btrfs-ow...@vger.kernel.org [mailto:linux-btrfs- > ow...@vger.kernel.org] On Behalf Of Kai Krakow > Sent: Thursday, 21 September 2017 6:45 AM > To: linux-btrfs@vger.kernel.org > Subject: Re: SSD caching an existing btrfs raid1 > > Am Wed, 20 Sep 2017 17:51:15 +0200 > schrieb Psalle: > > > On 19/09/17 17:47, Austin S. Hemmelgarn wrote: > > (...) > > > > > > A better option if you can afford to remove a single device from > > > that array temporarily is to use bcache. Bcache has one specific > > > advantage in this case, multiple backend devices can share the same > > > cache device. This means you don't have to carve out dedicated cache > > > space for each disk on the SSD and leave some unused space so that > > > you can add new devices if needed. The downside is that you can't > > > convert each device in-place, but because you're using BTRFS, you > > > can still convert the volume as a whole in-place. The procedure for > > > doing so looks like this: > > > > > > 1. Format the SSD as a bcache cache. > > > 2. Use `btrfs device delete` to remove a single hard drive from the > > > array. > > > 3. Set up the drive you just removed as a bcache backing device > > > bound to the cache you created in step 1. > > > 4. Add the new bcache device to the array. > > > 5. Repeat from step 2 until the whole array is converted. > > > > > > A similar procedure can actually be used to do almost any underlying > > > storage conversion (for example, switching to whole disk encryption, > > > or adding LVM underneath BTRFS) provided all your data can fit on > > > one less disk than you have. > > > > Thanks Austin, that's just great. For some reason I had discarded > > bcache thinking that it would force me to rebuild from scratch, but > > this kind of incremental migration is exactly why I hoped was > > possible. I have plenty of space to replace the devices one by one. > > > > I will report back my experience in a few days, I hope. > > I've done it exactly that way in the past and it worked flawlessly (but it > took > 24+ hours). But it was easy for me because I was also adding a third disk to > the pool, so existing stuff could easily move. Device delete takes freaking ages! I would avoid using it if you can. Device replace is much faster. Paul. N�r��yb�X��ǧv�^�){.n�+{�n�߲)w*jg����ݢj/���z�ޖ��2�ޙ&�)ߡ�a�����G���h��j:+v���w��٥