RE: [PATCH] btrfs/ioctl.c: quiet sparse warnings
On Friday, September 23, 2011 11:16 AM, Joe Perches wrote: > On Fri, 2011-09-23 at 11:07 -0700, H Hartley Sweeten wrote: >> Quiet the following sparse warnings: > [] >> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > [] >> @@ -2705,7 +2705,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, >> void __user *arg) >> up_read(&info->groups_sem); >> } >> >> -user_dest = (struct btrfs_ioctl_space_info *) >> +user_dest = (struct btrfs_ioctl_space_info __user *) >> (arg + sizeof(struct btrfs_ioctl_space_args)); > > user_dest = arg; > user_dest++; > > ? That produces a new sparse warning: fs/btrfs/ioctl.c: In function ‘btrfs_ioctl_space_info’: fs/btrfs/ioctl.c:2708: warning: ‘user_dest’ may be used uninitialized in this function I guess user_dest could be set at the start of the function. This would also remove the cast of arg in the first copy_from_user. Something like this: -- diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 970977a..9e7e5dc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2599,13 +2599,13 @@ static void get_block_group_info(struct list_head *groups_list, } } -long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_space_args space_args; struct btrfs_ioctl_space_info space; struct btrfs_ioctl_space_info *dest; struct btrfs_ioctl_space_info *dest_orig; - struct btrfs_ioctl_space_info __user *user_dest; + struct btrfs_ioctl_space_info __user *user_dest = arg; struct btrfs_space_info *info; u64 types[] = {BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_SYSTEM, @@ -2617,9 +2617,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) u64 slot_count = 0; int i, c; - if (copy_from_user(&space_args, - (struct btrfs_ioctl_space_args __user *)arg, - sizeof(space_args))) + if (copy_from_user(&space_args, user_dest, sizeof(space_args))) return -EFAULT; for (i = 0; i < num_types; i++) { @@ -2705,8 +2703,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) up_read(&info->groups_sem); } - user_dest = (struct btrfs_ioctl_space_info *) - (arg + sizeof(struct btrfs_ioctl_space_args)); + user_dest++; if (copy_to_user(user_dest, dest_orig, alloc_size)) ret = -EFAULT;
Re: [PATCH] btrfs/extent_map.c: quiet sparse noise
On Thu, 22 Sep 2011 18:45:56 PDT, H Hartley Sweeten said: > Quiet the sparse noise: > > warning: symbol '__lookup_extent_mapping' was not declared. Should it be > static? Patch itself is correct, changelog is bad. You're not quieting sparse noise, you're making a declaration static because it doesn't need external visibility. pgp680pMsCuLa.pgp Description: PGP signature
[PATCH] btrfs/delayed-inode.c: quiet sparse noise
Quiet the following sparse noise: warning: symbol 'btrfs_first_delayed_node' was not declared. Should it be static? warning: symbol 'btrfs_next_delayed_node' was not declared. Should it be static? warning: symbol 'btrfs_first_prepared_delayed_node' was not declared. Should it be static? warning: symbol 'btrfs_alloc_delayed_item' was not declared. Should it be static? warning: symbol '__btrfs_lookup_delayed_insertion_item' was not declared. Should it be static? warning: symbol '__btrfs_lookup_delayed_deletion_item' was not declared. Should it be static? warning: symbol '__btrfs_search_delayed_insertion_item' was not declared. Should it be static? warning: symbol '__btrfs_search_delayed_deletion_item' was not declared. Should it be static? warning: symbol '__btrfs_first_delayed_insertion_item' was not declared. Should it be static? warning: symbol '__btrfs_first_delayed_deletion_item' was not declared. Should it be static? warning: symbol '__btrfs_next_delayed_item' was not declared. Should it be static? The functions __btrfs_lookup_delayed_deletion_item, __btrfs_search_delayed_insertion_item, and __btrfs_search_delayed_deletion_item are not currently being used by the code. They may be eventually, so make them inline to for to now make sparse happy and allow the linker to discard them. Signed-off-by: H Hartley Sweeten Cc: Chris Mason --- diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index b52c672..5d3c397 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -199,8 +199,8 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root, spin_unlock(&root->lock); } -struct btrfs_delayed_node *btrfs_first_delayed_node( - struct btrfs_delayed_root *delayed_root) +static struct btrfs_delayed_node * +btrfs_first_delayed_node(struct btrfs_delayed_root *delayed_root) { struct list_head *p; struct btrfs_delayed_node *node = NULL; @@ -218,8 +218,8 @@ out: return node; } -struct btrfs_delayed_node *btrfs_next_delayed_node( - struct btrfs_delayed_node *node) +static struct btrfs_delayed_node * +btrfs_next_delayed_node(struct btrfs_delayed_node *node) { struct btrfs_delayed_root *delayed_root; struct list_head *p; @@ -279,8 +279,8 @@ static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node) __btrfs_release_delayed_node(node, 0); } -struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( - struct btrfs_delayed_root *delayed_root) +static struct btrfs_delayed_node * +btrfs_first_prepared_delayed_node(struct btrfs_delayed_root *delayed_root) { struct list_head *p; struct btrfs_delayed_node *node = NULL; @@ -305,7 +305,7 @@ static inline void btrfs_release_prepared_delayed_node( __btrfs_release_delayed_node(node, 1); } -struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) +static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) { struct btrfs_delayed_item *item; item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); @@ -380,9 +380,9 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item( return NULL; } -struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( - struct btrfs_delayed_node *delayed_node, - struct btrfs_key *key) +static struct btrfs_delayed_item * +__btrfs_lookup_delayed_insertion_item(struct btrfs_delayed_node *delayed_node, + struct btrfs_key *key) { struct btrfs_delayed_item *item; @@ -391,9 +391,9 @@ struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( return item; } -struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item( - struct btrfs_delayed_node *delayed_node, - struct btrfs_key *key) +static inline struct btrfs_delayed_item * +__btrfs_lookup_delayed_deletion_item(struct btrfs_delayed_node *delayed_node, +struct btrfs_key *key) { struct btrfs_delayed_item *item; @@ -402,9 +402,9 @@ struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item( return item; } -struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item( - struct btrfs_delayed_node *delayed_node, - struct btrfs_key *key) +static inline struct btrfs_delayed_item * +__btrfs_search_delayed_insertion_item(struct btrfs_delayed_node *delayed_node, + struct btrfs_key *key) { struct btrfs_delayed_item *item, *next; @@ -416,9 +416,9 @@ struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item( return item; } -struct btrfs_delayed_item *__btrfs_search_
[PATCH] btrfs/compression.c: quiet sparse noise
Quiet the sparse warning: warning: symbol 'btrfs_compress_op' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Cc: Chris Mason --- diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8ec5d86..3e8c133 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -726,7 +726,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; -struct btrfs_compress_op *btrfs_compress_op[] = { +static struct btrfs_compress_op *btrfs_compress_op[] = { &btrfs_zlib_compress, &btrfs_lzo_compress, }; -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] btrfs/free-space-cache.c: quiet sparse warnings
Quiet the following sparse warnings: warning: symbol '__create_free_space_inode' was not declared. Should it be static? warning: symbol '__load_free_space_cache' was not declared. Should it be static? warning: symbol '__btrfs_write_out_cache' was not declared. Should it be static? warning: symbol '__btrfs_remove_free_space_cache_locked' was not declared. Should it be static? warning: context imbalance in 'insert_into_bitmap' - unexpected unlock Signed-off-by: H Hartley Sweeten Cc: Chris Mason (maintainer:BTRFS FILE SYSTEM) --- diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6a265b9..bdd5c0e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -113,7 +113,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return inode; } -int __create_free_space_inode(struct btrfs_root *root, +static int __create_free_space_inode(struct btrfs_root *root, struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 ino, u64 offset) { @@ -238,7 +238,7 @@ static int readahead_cache(struct inode *inode) return 0; } -int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, +static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_path *path, u64 offset) { @@ -526,7 +526,7 @@ out: return ret; } -int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, +static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_block_group_cache *block_group, struct btrfs_trans_handle *trans, @@ -1433,6 +1433,8 @@ static struct btrfs_free_space_op free_space_op = { static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info) + __releases(&ctl->tree_lock) + __acquires(&ctl->tree_lock) { struct btrfs_free_space *bitmap_info; struct btrfs_block_group_cache *block_group = NULL; @@ -1842,7 +1844,8 @@ out: return 0; } -void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) +static void +__btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) { struct btrfs_free_space *info; struct rb_node *node; -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs/ioctl.c: quiet sparse warnings
On Fri, 2011-09-23 at 11:07 -0700, H Hartley Sweeten wrote: > Quiet the following sparse warnings: [] > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c [] > @@ -2705,7 +2705,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, > void __user *arg) > up_read(&info->groups_sem); > } > > - user_dest = (struct btrfs_ioctl_space_info *) > + user_dest = (struct btrfs_ioctl_space_info __user *) > (arg + sizeof(struct btrfs_ioctl_space_args)); user_dest = arg; user_dest++; ? -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] btrfs/ioctl.c: quiet sparse warnings
Quiet the following sparse warnings: warning: cast removes address space of expression warning: incorrect type in assignment (different address spaces) expected struct btrfs_ioctl_space_info [noderef] *user_dest got struct btrfs_ioctl_space_info * warning: symbol 'btrfs_ioctl_space_info' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Cc: Chris Mason --- diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 970977a..a001af4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2599,7 +2599,7 @@ static void get_block_group_info(struct list_head *groups_list, } } -long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_space_args space_args; struct btrfs_ioctl_space_info space; @@ -2705,7 +2705,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) up_read(&info->groups_sem); } - user_dest = (struct btrfs_ioctl_space_info *) + user_dest = (struct btrfs_ioctl_space_info __user *) (arg + sizeof(struct btrfs_ioctl_space_args)); if (copy_to_user(user_dest, dest_orig, alloc_size)) -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Btrfs bug with g38867a2 and a question
On Fri, Sep 23, 2011 at 11:34:40AM -0400, Josef Bacik wrote: > Yeah this is a different problem that's fixed upstream, so reboot into > your other newer kernel with -o clear_cache. Thanks, Ok I'm back in business now, thanks... Now I'll try to understand why my pc sometimes hangs doing write IOs... -- Mathieu Chouquet-Stringer math...@csetco.com The sun itself sees not till heaven clears. -- William Shakespeare -- -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Btrfs bug with g38867a2 and a question
On 09/23/2011 11:31 AM, Mathieu Chouquet-Stringer wrote: > On Fri, Sep 23, 2011 at 10:49:22AM -0400, Josef Bacik wrote: >> Ok I have no idea how this could happen. Can you mount -o clear_cache >> and see if it's just the cache that's bad? Thanks, > > Did that and got this (it's a never ending story, this is from a F16 > alpha boot cd hence stack trace could be different): > Yeah this is a different problem that's fixed upstream, so reboot into your other newer kernel with -o clear_cache. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Btrfs bug with g38867a2 and a question
On Fri, Sep 23, 2011 at 10:49:22AM -0400, Josef Bacik wrote: > Ok I have no idea how this could happen. Can you mount -o clear_cache > and see if it's just the cache that's bad? Thanks, Did that and got this (it's a never ending story, this is from a F16 alpha boot cd hence stack trace could be different): [ 512.455253] [ cut here ] [ 512.455464] kernel BUG at fs/btrfs/inode.c:4586! [ 512.455662] invalid opcode: [#1] SMP [ 512.455874] CPU 1 [ 512.455879] Modules linked in: btrfs zlib_deflate libcrc32c xts lrw gf128mul sha256_generic dm_crypt dm_round_robin dm_multipath linear raid10 raid456 async_raid6_recov async_pq raid6_pq async_xor xor async_memcpy async_tx raid1 raid0 iscsi_ibft iscsi_boot_sysfs pcspkr edd iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi cramfs arc4 firewire_ohci firewire_core sdhci_pci sdhci yenta_socket crc_itu_t mmc_core iwl4965 iwl_legacy mac80211 cfg80211 rfkill nouveau ttm drm_kms_helper drm i2c_algo_bit i2c_core mxm_wmi wmi video e1000e squashfs [ 512.456018] [ 512.456018] Pid: 1349, comm: mount Not tainted 3.0.0-1.fc16.x86_64 #1 LENOVO 6458V5C/6458V5C [ 512.456018] RIP: 0010:[] [] btrfs_add_link+0x123/0x17c [btrfs] [ 512.456018] RSP: 0018:880123e09848 EFLAGS: 00010282 [ 512.456018] RAX: ffef RBX: 8801194c9d78 RCX: 0046 [ 512.456018] RDX: 0127 RSI: 88012aa7aaf0 RDI: 0282 [ 512.456018] RBP: 880123e098b8 R08: 8801228ac000 R09: 005a [ 512.456018] R10: 880123e096d8 R11: 88013b4023c0 R12: 8801194ca610 [ 512.456018] R13: 8801165d2090 R14: 000b R15: 8801181fd630 [ 512.456018] FS: 7f7bb600a820() GS:88013bc0() knlGS: [ 512.456018] CS: 0010 DS: ES: CR0: 8005003b [ 512.456018] CR2: 7f7235313768 CR3: 000113f09000 CR4: 06e0 [ 512.456018] DR0: DR1: DR2: [ 512.456018] DR3: DR6: 0ff0 DR7: 0400 [ 512.456018] Process mount (pid: 1349, threadinfo 880123e08000, task 88012aa7a3e0) [ 512.456018] Stack: [ 512.456018] 88010001 00018028 880123e09888 8801194bb000 [ 512.456018] 5aff8801165b3000 01001537 [ 512.456018] 1000 8801194ba1b0 8801194ca610 880123e099d7 [ 512.456018] Call Trace: [ 512.456018] [] add_inode_ref+0x2e6/0x37c [btrfs] [ 512.456018] [] ? read_extent_buffer+0xc3/0xe3 [btrfs] [ 512.456018] [] replay_one_buffer+0x197/0x212 [btrfs] [ 512.456018] [] walk_up_log_tree+0xe4/0x1aa [btrfs] [ 512.456018] [] ? replay_one_dir_item+0xbd/0xbd [btrfs] [ 512.456018] [] walk_log_tree+0x9e/0x19e [btrfs] [ 512.456018] [] btrfs_recover_log_trees+0x28b/0x298 [btrfs] [ 512.456018] [] ? replay_one_dir_item+0xbd/0xbd [btrfs] [ 512.456018] [] open_ctree+0x11aa/0x14b8 [btrfs] [ 512.456018] [] btrfs_mount+0x233/0x498 [btrfs] [ 512.456018] [] ? free_pages+0x47/0x4c [ 512.456018] [] mount_fs+0x69/0x155 [ 512.456018] [] ? __alloc_percpu+0x10/0x12 [ 512.456018] [] vfs_kern_mount+0x63/0xa0 [ 512.456018] [] do_kern_mount+0x4d/0xdf [ 512.456018] [] do_mount+0x63c/0x69f [ 512.456018] [] sys_mount+0x88/0xc2 [ 512.456018] [] system_call_fastpath+0x16/0x1b [ 512.456018] Code: 89 f1 4c 89 fa 4c 89 ee 48 89 44 24 08 41 8b 04 24 66 c1 e8 0c 83 e0 0f 0f b6 80 b8 bd 39 a0 89 04 24 e8 db cf fe ff 85 c0 74 02 <0f> 0b 45 01 f6 4d 63 f6 4c 03 b3 a0 01 00 00 4c 89 b3 a0 01 00 [ 512.456018] RIP [] btrfs_add_link+0x123/0x17c [btrfs] [ 512.456018] RSP [ 512.485056] ---[ end trace cea880cef8a5d83b ]--- -- Mathieu Chouquet-Stringer math...@csetco.com The sun itself sees not till heaven clears. -- William Shakespeare -- -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Btrfs bug with g38867a2 and a question
On 09/23/2011 08:55 AM, Mathieu Chouquet-Stringer wrote: > On Thu, Sep 22, 2011 at 10:32:13PM +0200, Mathieu Chouquet-Stringer wrote: >> On Thu, Sep 22, 2011 at 09:30:07PM +0200, Mathieu Chouquet-Stringer wrote: >>> On Thu, Sep 22, 2011 at 03:00:03PM -0400, Josef Bacik wrote: Oh wow sorry I sent you the completely wrong patch, I wish I had caught your reply earlier. Can you run with this patch, which is the one I meant to give you :). Thanks, >>> >>> No worries, I've applied your patch (seems your thunderbird mangled line >>> returns) and I'm rebooting... >> >> There: >> http://mathieu.csetco.com/btrfs/btrfs-screenshots.tar > > Just realised I truncated the first line! It should have been: > > Couldn't find in bitmap offset=20737413120, bytes=57344, search=20765691904, > search_bytes=65536, ret=0 Ok I have no idea how this could happen. Can you mount -o clear_cache and see if it's just the cache that's bad? Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 2/2/4] mm: try to distribute dirty pages fairly across zones
The maximum number of dirty pages that exist in the system at any time is determined by a number of pages considered dirtyable and a user-configured percentage of those, or an absolute number in bytes. This number of dirtyable pages is the sum of memory provided by all the zones in the system minus their lowmem reserves and high watermarks, so that the system can retain a healthy number of free pages without having to reclaim dirty pages. But there is a flaw in that we have a zoned page allocator which does not care about the global state but rather the state of individual memory zones. And right now there is nothing that prevents one zone from filling up with dirty pages while other zones are spared, which frequently leads to situations where kswapd, in order to restore the watermark of free pages, does indeed have to write pages from that zone's LRU list. This can interfere so badly with IO from the flusher threads that major filesystems (btrfs, xfs, ext4) mostly ignore write requests from reclaim already, taking away the VM's only possibility to keep such a zone balanced, aside from hoping the flushers will soon clean pages from that zone. Enter per-zone dirty limits. They are to a zone's dirtyable memory what the global limit is to the global amount of dirtyable memory, and try to make sure that no single zone receives more than its fair share of the globally allowed dirty pages in the first place. As the number of pages considered dirtyable exclude the zones' lowmem reserves and high watermarks, the maximum number of dirty pages in a zone is such that the zone can always be balanced without requiring page cleaning. As this is a placement decision in the page allocator and pages are dirtied only after the allocation, this patch allows allocators to pass __GFP_WRITE when they know in advance that the page will be written to and become dirty soon. The page allocator will then attempt to allocate from the first zone of the zonelist - which on NUMA is determined by the task's NUMA memory policy - that has not exceeded its dirty limit. At first glance, it would appear that the diversion to lower zones can increase pressure on them, but this is not the case. With a full high zone, allocations will be diverted to lower zones eventually, so it is more of a shift in timing of the lower zone allocations. Workloads that previously could fit their dirty pages completely in the higher zone may be forced to allocate from lower zones, but the amount of pages that 'spill over' are limited themselves by the lower zones' dirty constraints, and thus unlikely to become a problem. For now, the problem of unfair dirty page distribution remains for NUMA configurations where the zones allowed for allocation are in sum not big enough to trigger the global dirty limits, wake up the flusher threads and remedy the situation. Because of this, an allocation that could not succeed on any of the considered zones is allowed to ignore the dirty limits before going into direct reclaim or even failing the allocation, until a future patch changes the global dirty throttling and flusher thread activation so that they take individual zone states into account. Signed-off-by: Johannes Weiner --- include/linux/gfp.h |4 ++- include/linux/writeback.h |1 + mm/page-writeback.c | 83 + mm/page_alloc.c | 29 4 files changed, 116 insertions(+), 1 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 3a76faf..50efc7e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -36,6 +36,7 @@ struct vm_area_struct; #endif #define ___GFP_NO_KSWAPD 0x40u #define ___GFP_OTHER_NODE 0x80u +#define ___GFP_WRITE 0x100u /* * GFP bitmasks.. @@ -85,6 +86,7 @@ struct vm_area_struct; #define __GFP_NO_KSWAPD((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ +#define __GFP_WRITE((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ /* * This may seem redundant, but it's a way of annotating false positives vs. @@ -92,7 +94,7 @@ struct vm_area_struct; */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 24/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 25/* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a5f495f..c96ee0c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -104,6 +104,7 @@ void laptop_mode_timer_fn(unsigned long data); static inline void laptop_sync_completion(void) { } #endif void throttle_vm_writeout(gfp_t gfp_mask); +bool zone_dirty_ok(struct zone *zone); extern unsigned long global_dirty_limit; d
[patch 1/2/4] mm: writeback: cleanups in preparation for per-zone dirty limits
On Thu, Sep 22, 2011 at 10:52:42AM +0200, Johannes Weiner wrote: > On Wed, Sep 21, 2011 at 04:02:26PM -0700, Andrew Morton wrote: > > Should we rename determine_dirtyable_memory() to > > global_dirtyable_memory(), to get some sense of its relationship with > > zone_dirtyable_memory()? > > Sounds good. --- The next patch will introduce per-zone dirty limiting functions in addition to the traditional global dirty limiting. Rename determine_dirtyable_memory() to global_dirtyable_memory() before adding the zone-specific version, and fix up its documentation. Also, move the functions to determine the dirtyable memory and the function to calculate the dirty limit based on that together so that their relationship is more apparent and that they can be commented on as a group. Signed-off-by: Johannes Weiner --- mm/page-writeback.c | 92 +- 1 files changed, 46 insertions(+), 46 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c8acf8a..78604a6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -186,12 +186,12 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) } /** - * determine_dirtyable_memory - amount of memory that may be used + * global_dirtyable_memory - number of globally dirtyable pages * - * Returns the numebr of pages that can currently be freed and used - * by the kernel for direct mappings. + * Returns the global number of pages potentially available for dirty + * page cache. This is the base value for the global dirty limits. */ -static unsigned long determine_dirtyable_memory(void) +static unsigned long global_dirtyable_memory(void) { unsigned long x; @@ -205,6 +205,47 @@ static unsigned long determine_dirtyable_memory(void) } /* + * global_dirty_limits - background-writeback and dirty-throttling thresholds + * + * Calculate the dirty thresholds based on sysctl parameters + * - vm.dirty_background_ratio or vm.dirty_background_bytes + * - vm.dirty_ratio or vm.dirty_bytes + * The dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and + * real-time tasks. + */ +void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) +{ + unsigned long background; + unsigned long dirty; + unsigned long uninitialized_var(available_memory); + struct task_struct *tsk; + + if (!vm_dirty_bytes || !dirty_background_bytes) + available_memory = global_dirtyable_memory(); + + if (vm_dirty_bytes) + dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); + else + dirty = (vm_dirty_ratio * available_memory) / 100; + + if (dirty_background_bytes) + background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE); + else + background = (dirty_background_ratio * available_memory) / 100; + + if (background >= dirty) + background = dirty / 2; + tsk = current; + if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { + background += background / 4; + dirty += dirty / 4; + } + *pbackground = background; + *pdirty = dirty; + trace_global_dirty_state(background, dirty); +} + +/* * couple the period to the dirty_ratio: * * period/2 ~ roundup_pow_of_two(dirty limit) @@ -216,7 +257,7 @@ static int calc_period_shift(void) if (vm_dirty_bytes) dirty_total = vm_dirty_bytes / PAGE_SIZE; else - dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / + dirty_total = (vm_dirty_ratio * global_dirtyable_memory()) / 100; return 2 + ilog2(dirty_total - 1); } @@ -416,47 +457,6 @@ static unsigned long hard_dirty_limit(unsigned long thresh) return max(thresh, global_dirty_limit); } -/* - * global_dirty_limits - background-writeback and dirty-throttling thresholds - * - * Calculate the dirty thresholds based on sysctl parameters - * - vm.dirty_background_ratio or vm.dirty_background_bytes - * - vm.dirty_ratio or vm.dirty_bytes - * The dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and - * real-time tasks. - */ -void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) -{ - unsigned long background; - unsigned long dirty; - unsigned long uninitialized_var(available_memory); - struct task_struct *tsk; - - if (!vm_dirty_bytes || !dirty_background_bytes) - available_memory = determine_dirtyable_memory(); - - if (vm_dirty_bytes) - dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); - else - dirty = (vm_dirty_ratio * available_memory) / 100; - - if (dirty_background_bytes) - background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE); - else - background = (dirty_background_ratio * available_mem
[patch 1/4 v2] mm: exclude reserved pages from dirtyable memory
The amount of dirtyable pages should not include the full number of free pages: there is a number of reserved pages that the page allocator and kswapd always try to keep free. The closer (reclaimable pages - dirty pages) is to the number of reserved pages, the more likely it becomes for reclaim to run into dirty pages: +--+ --- | anon | | +--+ | | | | | | -- dirty limit new-- flusher new | file | | | | | | | | | -- dirty limit old-- flusher old | || +--+ --- reclaim | reserved | +--+ | kernel | +--+ This patch introduces a per-zone dirty reserve that takes both the lowmem reserve as well as the high watermark of the zone into account, and a global sum of those per-zone values that is subtracted from the global amount of dirtyable pages. The lowmem reserve is unavailable to page cache allocations and kswapd tries to keep the high watermark free. We don't want to end up in a situation where reclaim has to clean pages in order to balance zones. Not treating reserved pages as dirtyable on a global level is only a conceptual fix. In reality, dirty pages are not distributed equally across zones and reclaim runs into dirty pages on a regular basis. But it is important to get this right before tackling the problem on a per-zone level, where the distance between reclaim and the dirty pages is mostly much smaller in absolute numbers. Signed-off-by: Johannes Weiner --- include/linux/mmzone.h |6 ++ include/linux/swap.h |1 + mm/page-writeback.c|6 -- mm/page_alloc.c| 19 +++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1ed4116..37a61e7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -317,6 +317,12 @@ struct zone { */ unsigned long lowmem_reserve[MAX_NR_ZONES]; + /* +* This is a per-zone reserve of pages that should not be +* considered dirtyable memory. +*/ + unsigned long dirty_balance_reserve; + #ifdef CONFIG_NUMA int node; /* diff --git a/include/linux/swap.h b/include/linux/swap.h index b156e80..9021453 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -209,6 +209,7 @@ struct swap_list_t { /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; +extern unsigned long dirty_balance_reserve; extern unsigned int nr_free_buffer_pages(void); extern unsigned int nr_free_pagecache_pages(void); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index da6d263..c8acf8a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -170,7 +170,8 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; x += zone_page_state(z, NR_FREE_PAGES) + -zone_reclaimable_pages(z); +zone_reclaimable_pages(z) - +zone->dirty_balance_reserve; } /* * Make sure that the number of highmem pages is never larger @@ -194,7 +195,8 @@ static unsigned long determine_dirtyable_memory(void) { unsigned long x; - x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages(); + x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() - + dirty_balance_reserve; if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1dba05e..f8cba89 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -96,6 +96,14 @@ EXPORT_SYMBOL(node_states); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; +/* + * When calculating the number of globally allowed dirty pages, there + * is a certain number of per-zone reserves that should not be + * considered dirtyable memory. This is the sum of those reserves + * over all existing zones that contribute dirtyable memory. + */ +unsigned long dirty_balance_reserve __read_mostly; + int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; @@ -5076,8 +5084,19 @@ static void calculate_totalreserve_pages(void) if (max > zone->present_pages) max = zone->present_pages; reserve_pages += max; + /* +* Lowmem reserves are not available to +* GFP_HIGHUSER page cache allocations and +* kswapd tries to balance zones to their high +* watermark. As a result, neither
Re: [PATCH] xfstests: add new getdents test
On Fri, 2011-09-23 at 16:03 +0300, Grazvydas Ignotas wrote: > On Thu, Sep 22, 2011 at 11:18 PM, Alex Elder wrote: > > On Mon, 2011-09-12 at 03:19 +0300, Grazvydas Ignotas wrote: > >> The test checks if no duplicate d_off values are returned and that > >> those values are seekable to the right inodes. > >> > >> Signed-off-by: Grazvydas Ignotas > > > > I have two minor comments on the C program below, > > but even if you don't want to address them this > > looks good. > > > > Reviewed-by: Alex Elder > > . . . > >> +static uint64_t d_off_histoty[HISTORY_LEN]; > >> +static uint64_t d_ino_histoty[HISTORY_LEN]; > > > > Is "histoty" intentional or a typo? > > whoops, it's a typo. I might send a patch for this later. I will change this for you before committing. > > > >> +int > >> +main(int argc, char *argv[]) . . . > >> + nread = syscall(SYS_getdents64, fd, buf, BUF_SIZE); > > > > You could just use sizeof (struct linux_dirent_64) rather than > > BUF_SIZE here. I suppose it doesn't hurt but there's no real > > sense in reading more than the one you're going to look at. > > I'm not sure if reading partial entry is allowed, manpage says it may > fail with EINVAL if buffer size is too small.. I will keep this as-is. I was wrong about the size you should pass, but my point still stands... For a single entry you need to pass a buffer big enough to hold a linux_dirent64 structure *plus* the maximum-sized name that entry could contain. That appears to be 256 bytes, though POSIX allows more. Anyway, if you or anyone else wants to try to change this in the future to read in a smaller amount here, that's fine but it's not necessary now. Bottom line, I'll make the change I mentioned above and will commit the result. -Alex -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Honest timeline for btrfsck
Chris, Now that you're back from vacation, I was wondering if you would be able to provide a revised estimate on how long this will take. Also, of the four parts, which will be necessary to correct a 'parent transid verify failed' error? Thank you for your time, --Erik On Thu, Aug 18, 2011 at 1:50 PM, Chris Mason wrote: > Excerpts from Yalonda Gishtaka's message of 2011-08-17 21:09:37 -0400: >> Chris Mason oracle.com> writes: >> >> > >> > Aside from making sure the kernel code is stable, btrfsck is all I'm >> > working on right now. I do expect a release in the next two weeks that >> > can recover your data (and many others). >> > >> > Thanks, >> > Chris >> > -- >> >> >> Chris, >> >> We're all on the edge of our seats. Can you provide an updated ETA on the >> release of the first functional btrfsck tool? No pressure or anything ;) > > Hi everyone, > > I've been working non-stop on this. Currently fsck has four parts: > > 1) mount -o recovery mode. I've posted smaller forms of these patches > in the past that bypass log tree replay. The new versions have code to > create stub roots for trees that can't be read (like the extent > allocation tree) and will allow the mount to proceed. > > 2) fsck that scans for older roots. This takes advantage of older > copies of metadata to look for consistent tree roots on disk. The > downside is that it is currently very slow. I'm trying to speed it up > by limiting the search to only the metadata block groups and a few other > tricks. > > 3) fsck that fixes the extent allocation tree and the chunk tree. This > is where I've been spending most of my time. The problem is that it > tends to recover some filesystems and badly break others. While I'm > fixing up the corner cases that work poorly, I'm adding an undo log to > the fsck code so that you can get the FS back into its original state if > you don't like the result of the fsck. > > 4) The rest of the corruptions can be dealt with fairly well from the > kernel. I have a series of patches to make the extent allocation tree > less strict about reference counts and other rules, basically allowing > the FS to limp along instead of crash. > > These four things together are basically my minimal set of features > required for fedora and our own internal projects at Oracle to start > treating us as production filesystem. > > There are always bugs to fix, and I have #1 and #2 mostly ready. I had > hoped to get #1 out the door before I left on vacation and I still might > post it tonight. > > -chris > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] xfstests: add new getdents test
On Thu, Sep 22, 2011 at 11:18 PM, Alex Elder wrote: > On Mon, 2011-09-12 at 03:19 +0300, Grazvydas Ignotas wrote: >> The test checks if no duplicate d_off values are returned and that >> those values are seekable to the right inodes. >> >> Signed-off-by: Grazvydas Ignotas > > I have two minor comments on the C program below, > but even if you don't want to address them this > looks good. > > Reviewed-by: Alex Elder > > . . . > >> +#include >> + >> +struct linux_dirent64 { >> + uint64_t d_ino; >> + uint64_t d_off; >> + unsigned short d_reclen; >> + unsigned char d_type; >> + char d_name[0]; >> +}; >> + >> +#define BUF_SIZE 4096 >> +#define HISTORY_LEN 1024 >> + >> +static uint64_t d_off_histoty[HISTORY_LEN]; >> +static uint64_t d_ino_histoty[HISTORY_LEN]; > > Is "histoty" intentional or a typo? whoops, it's a typo. I might send a patch for this later. > >> +int >> +main(int argc, char *argv[]) >> +{ >> + int fd, nread; >> + char buf[BUF_SIZE]; > > . . . > >> + >> + /* check if seek works correctly */ >> + d = (struct linux_dirent64 *)buf; >> + for (i = total - 1; i >= 0; i--) >> + { >> + lret = lseek(fd, i > 0 ? d_off_histoty[i - 1] : 0, SEEK_SET); >> + if (lret == -1) { >> + perror("lseek"); >> + exit(EXIT_FAILURE); >> + } >> + >> + nread = syscall(SYS_getdents64, fd, buf, BUF_SIZE); > > You could just use sizeof (struct linux_dirent_64) rather than > BUF_SIZE here. I suppose it doesn't hurt but there's no real > sense in reading more than the one you're going to look at. I'm not sure if reading partial entry is allowed, manpage says it may fail with EINVAL if buffer size is too small.. -- Gražvydas -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Btrfs bug with g38867a2 and a question
On Thu, Sep 22, 2011 at 10:32:13PM +0200, Mathieu Chouquet-Stringer wrote: > On Thu, Sep 22, 2011 at 09:30:07PM +0200, Mathieu Chouquet-Stringer wrote: > > On Thu, Sep 22, 2011 at 03:00:03PM -0400, Josef Bacik wrote: > > > Oh wow sorry I sent you the completely wrong patch, I wish I had caught > > > your reply earlier. Can you run with this patch, which is the one I > > > meant to give you :). Thanks, > > > > No worries, I've applied your patch (seems your thunderbird mangled line > > returns) and I'm rebooting... > > There: > http://mathieu.csetco.com/btrfs/btrfs-screenshots.tar Just realised I truncated the first line! It should have been: Couldn't find in bitmap offset=20737413120, bytes=57344, search=20765691904, search_bytes=65536, ret=0 -- Mathieu Chouquet-Stringer math...@csetco.com The sun itself sees not till heaven clears. -- William Shakespeare -- -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html