Re: [f2fs-dev] [PATCH] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages
Hi Kim, -Original Message- From: Jaegeuk Kim [mailto:jaeg...@kernel.org] Sent: Wednesday, May 21, 2014 11:37 AM To: Chao Yu Cc: Jaegeuk Kim; linux-fsde...@vger.kernel.org; linux-ker...@vger.kernel.org; linux-f2fs-devel@lists.sourceforge.net Subject: Re: [f2fs-dev] [PATCH] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages Hi Chao, 2014-05-16 (금), 17:14 +0800, Chao Yu: Previously we allocate pages with no mapping in ra_sum_pages(), so we may encounter a crash in event trace of f2fs_submit_page_mbio where we access mapping data of the page. We'd better allocate pages in bd_inode mapping and invalidate these pages after we restore data from pages. It could avoid crash in above scenario. Call Trace: [f1031630] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs] [f10377bb] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs] [f103c5da] restore_node_summary+0x13a/0x280 [f2fs] [f103e22d] build_curseg+0x2bd/0x620 [f2fs] [f104043b] build_segment_manager+0x1cb/0x920 [f2fs] [f1032c85] f2fs_fill_super+0x535/0x8e0 [f2fs] [c115b66a] mount_bdev+0x16a/0x1a0 [f102f63f] f2fs_mount+0x1f/0x30 [f2fs] [c115c096] mount_fs+0x36/0x170 [c1173635] vfs_kern_mount+0x55/0xe0 [c1175388] do_mount+0x1e8/0x900 [c1175d72] SyS_mount+0x82/0xc0 [c16059cc] sysenter_do_call+0x12/0x22 Signed-off-by: Chao Yu chao2...@samsung.com --- fs/f2fs/node.c | 49 - 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3d60d3d..b5cd814 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1658,13 +1658,16 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are linked in pages list. + * these pre-readed pages are alloced in bd_inode's mapping tree. */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) { struct page *page; + struct inode *inode = sbi-sb-s_bdev-bd_inode; + struct address_space *mapping = inode-i_mapping; int page_idx = start; + int alloced, readed; struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO @@ -1672,21 +1675,23 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, for (; page_idx start + nrpages; page_idx++) { /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_F2FS_ZERO); + page = grab_cache_page(mapping, page_idx); if (!page) break; - - lock_page(page); - page-index = page_idx; - list_add_tail(page-lru, pages); + page_cache_release(page); IMO, we don't need to do like this. Instead, for() { page = grab_cache_page(); if (!page) break; page[page_idx] = page; f2fs_submit_page_mbio(sbi, page, fio); } f2fs_submit_merged_bio(sbi, META, READ); return page_idx - start; Agreed, it helps to remove a lot of redundant code here. Thanks! Afterwards, in restore_node_summry(), lock_page() will wait the end_io for read. ... f2fs_put_page(pages[index], 1); Thanks, } - list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page-index, fio); + alloced = page_idx - start; + readed = find_get_pages_contig(mapping, start, alloced, pages); + BUG_ON(alloced != readed); + + for (page_idx = 0; page_idx readed; page_idx++) + f2fs_submit_page_mbio(sbi, pages[page_idx], + pages[page_idx]-index, fio); f2fs_submit_merged_bio(sbi, META, READ); - return page_idx - start; + return readed; } int restore_node_summary(struct f2fs_sb_info *sbi, @@ -1694,11 +1699,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi, { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct page *page, *tmp; + struct inode *inode = sbi-sb-s_bdev-bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - int i, last_offset, nrpages, err = 0; - LIST_HEAD(page_list); + struct page *pages[bio_blocks]; + int i, index, last_offset, nrpages, err = 0; /* scan the node segment */ last_offset = sbi-blocks_per_seg; @@ -1709,29 +1714,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, bio_blocks); /* read ahead node pages */ - nrpages = ra_sum_pages(sbi, page_list, addr, nrpages);
Re: [f2fs-dev] [PATCH] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages
On Mon, May 26, 2014 at 02:26:24PM +0800, Chao Yu wrote: Hi changman, -Original Message- From: Changman Lee [mailto:cm224@samsung.com] Sent: Friday, May 23, 2014 1:14 PM To: Jaegeuk Kim Cc: Chao Yu; linux-fsde...@vger.kernel.org; linux-ker...@vger.kernel.org; linux-f2fs-devel@lists.sourceforge.net Subject: Re: [f2fs-dev] [PATCH] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages On Wed, May 21, 2014 at 12:36:46PM +0900, Jaegeuk Kim wrote: Hi Chao, 2014-05-16 (금), 17:14 +0800, Chao Yu: Previously we allocate pages with no mapping in ra_sum_pages(), so we may encounter a crash in event trace of f2fs_submit_page_mbio where we access mapping data of the page. We'd better allocate pages in bd_inode mapping and invalidate these pages after we restore data from pages. It could avoid crash in above scenario. Call Trace: [f1031630] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs] [f10377bb] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs] [f103c5da] restore_node_summary+0x13a/0x280 [f2fs] [f103e22d] build_curseg+0x2bd/0x620 [f2fs] [f104043b] build_segment_manager+0x1cb/0x920 [f2fs] [f1032c85] f2fs_fill_super+0x535/0x8e0 [f2fs] [c115b66a] mount_bdev+0x16a/0x1a0 [f102f63f] f2fs_mount+0x1f/0x30 [f2fs] [c115c096] mount_fs+0x36/0x170 [c1173635] vfs_kern_mount+0x55/0xe0 [c1175388] do_mount+0x1e8/0x900 [c1175d72] SyS_mount+0x82/0xc0 [c16059cc] sysenter_do_call+0x12/0x22 Signed-off-by: Chao Yu chao2...@samsung.com --- fs/f2fs/node.c | 49 - 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3d60d3d..b5cd814 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1658,13 +1658,16 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are linked in pages list. + * these pre-readed pages are alloced in bd_inode's mapping tree. */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) { struct page *page; + struct inode *inode = sbi-sb-s_bdev-bd_inode; How about use sbi-meta_inode instead of bd_inode, then we can do caching summary pages for further i/o. In my understanding, In ra_sum_pages() we readahead node pages in NODE segment, then we could padding current summary caching with nid of node page's footer. So we should not cache this readaheaded pages in meta_inode's mapping. Do I miss something? Regards Sorry, you're right. Forget about caching. I've confused ra_sum_pages with summary segments. + struct address_space *mapping = inode-i_mapping; int page_idx = start; + int alloced, readed; struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO @@ -1672,21 +1675,23 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, for (; page_idx start + nrpages; page_idx++) { /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_F2FS_ZERO); + page = grab_cache_page(mapping, page_idx); if (!page) break; - - lock_page(page); - page-index = page_idx; - list_add_tail(page-lru, pages); + page_cache_release(page); IMO, we don't need to do like this. Instead, for() { page = grab_cache_page(); if (!page) break; page[page_idx] = page; f2fs_submit_page_mbio(sbi, page, fio); } f2fs_submit_merged_bio(sbi, META, READ); return page_idx - start; Afterwards, in restore_node_summry(), lock_page() will wait the end_io for read. ... f2fs_put_page(pages[index], 1); Thanks, } - list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page-index, fio); + alloced = page_idx - start; + readed = find_get_pages_contig(mapping, start, alloced, pages); + BUG_ON(alloced != readed); + + for (page_idx = 0; page_idx readed; page_idx++) + f2fs_submit_page_mbio(sbi, pages[page_idx], + pages[page_idx]-index, fio); f2fs_submit_merged_bio(sbi, META, READ); - return page_idx - start; + return readed; }
[f2fs-dev] [PATCH 1/2 V3] mkfs.f2fs: large volume support
Changes from V2 o remove CP_LARGE_VOL_LFLAG instead, use cp_payload in superblock because disk size is determined at format Changes from V1 o fix orphan node blkaddr -- 8 -- From 7e5e66699bb383e4fa7ce970e1cc8e10eb0a5c6f Mon Sep 17 00:00:00 2001 From: root root@f2fs-00.(none) Date: Mon, 12 May 2014 22:01:38 +0900 Subject: [PATCH 1/2] mkfs.f2fs: large volume support This patch supports large volume over about 3TB. Signed-off-by: Changman Lee cm224@samsung.com --- include/f2fs_fs.h |9 +++ mkfs/f2fs_format.c | 68 +++- 2 files changed, 66 insertions(+), 11 deletions(-) diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h index 94d8dc3..3003f7f 100644 --- a/include/f2fs_fs.h +++ b/include/f2fs_fs.h @@ -223,6 +223,7 @@ enum { #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ #define F2FS_BLKSIZE 4096/* support only 4KB block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) #define NULL_ADDR 0x0U #define NEW_ADDR -1U @@ -279,6 +280,7 @@ struct f2fs_super_block { __le16 volume_name[512];/* volume name */ __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __le32 cp_payload; } __attribute__((packed)); /* @@ -457,6 +459,13 @@ struct f2fs_nat_block { #define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) /* + * F2FS uses 4 bytes to represent block address. As a result, supported size of + * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. + */ +#define F2FS_MAX_SEGMENT ((16 * 1024 * 1024) / 2) +#define MAX_SIT_BITMAP_SIZE((F2FS_MAX_SEGMENT / SIT_ENTRY_PER_BLOCK) / 8) + +/* * Note that f2fs_sit_entry-vblocks has the following bit-field information. * [15:10] : allocation type such as CURSEG__TYPE * [9:0] : valid block count diff --git a/mkfs/f2fs_format.c b/mkfs/f2fs_format.c index cdbf74a..58550a2 100644 --- a/mkfs/f2fs_format.c +++ b/mkfs/f2fs_format.c @@ -102,7 +102,8 @@ static int f2fs_prepare_super_block(void) u_int32_t blocks_for_sit, blocks_for_nat, blocks_for_ssa; u_int32_t total_valid_blks_available; u_int64_t zone_align_start_offset, diff, total_meta_segments; - u_int32_t sit_bitmap_size, max_nat_bitmap_size, max_nat_segments; + u_int32_t sit_bitmap_size, max_sit_bitmap_size; + u_int32_t max_nat_bitmap_size, max_nat_segments; u_int32_t total_zones; super_block.magic = cpu_to_le32(F2FS_SUPER_MAGIC); @@ -217,8 +218,25 @@ static int f2fs_prepare_super_block(void) */ sit_bitmap_size = ((le32_to_cpu(super_block.segment_count_sit) / 2) log_blks_per_seg) / 8; - max_nat_bitmap_size = CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 1 - - sit_bitmap_size; + + if (sit_bitmap_size MAX_SIT_BITMAP_SIZE) + max_sit_bitmap_size = MAX_SIT_BITMAP_SIZE; + else + max_sit_bitmap_size = sit_bitmap_size; + + /* +* It should be reserved minimum 1 segment for nat. +* When sit is too large, we should expand cp area. It requires more pages for cp. +*/ + if (max_sit_bitmap_size + (CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 65)) { + max_nat_bitmap_size = CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 1; + super_block.cp_payload = F2FS_BLK_ALIGN(max_sit_bitmap_size); + } else { + max_nat_bitmap_size = CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 1 - max_sit_bitmap_size; + super_block.cp_payload = 0; + } + max_nat_segments = (max_nat_bitmap_size * 8) log_blks_per_seg; if (le32_to_cpu(super_block.segment_count_nat) max_nat_segments) @@ -434,6 +452,7 @@ static int f2fs_write_check_point_pack(void) u_int64_t cp_seg_blk_offset = 0; u_int32_t crc = 0; int i; + char *cp_payload = NULL; ckp = calloc(F2FS_BLKSIZE, 1); if (ckp == NULL) { @@ -447,6 +466,12 @@ static int f2fs_write_check_point_pack(void) return -1; } + cp_payload = calloc(F2FS_BLKSIZE, 1); + if (cp_payload == NULL) { + MSG(1, \tError: Calloc Failed for cp_payload!!!\n); + return -1; + } + /* 1. cp page 1 of checkpoint pack 1 */ ckp-checkpoint_ver = cpu_to_le64(1); ckp-cur_node_segno[0] = @@ -485,9 +510,11 @@ static int f2fs_write_check_point_pack(void) ((le32_to_cpu(ckp-free_segment_count) + 6 - le32_to_cpu(ckp-overprov_segment_count)) * config.blks_per_seg)); - ckp-cp_pack_total_block_count
[f2fs-dev] [PATCH 2/2 V3] fsck.f2fs: large volume support
Changes from V2 o remove CP_LARGE_VOL_FLAG instead, use cp_payload in superblock because disk size is determined at format Changes from V1 o fix orphan node blkaddr -- 8 -- From 405367374f868a8cf29bef62c06bf53271b58f52 Mon Sep 17 00:00:00 2001 From: Changman Lee cm224@samsung.com Date: Mon, 12 May 2014 22:03:46 +0900 Subject: [PATCH 2/2] fsck.f2fs: large volume support This patch support large volume over about 3TB. Signed-off-by: Changman Lee cm224@samsung.com --- fsck/f2fs.h | 14 +++--- fsck/fsck.c |7 +-- fsck/mount.c | 22 -- lib/libf2fs.c |4 ++-- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/fsck/f2fs.h b/fsck/f2fs.h index e1740fe..427a733 100644 --- a/fsck/f2fs.h +++ b/fsck/f2fs.h @@ -203,9 +203,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - int offset = (flag == NAT_BITMAP) ? - le32_to_cpu(ckpt-sit_ver_bitmap_bytesize) : 0; - return ckpt-sit_nat_version_bitmap + offset; + int offset; + if (le32_to_cpu(F2FS_RAW_SUPER(sbi)-cp_payload) 0) { + if (flag == NAT_BITMAP) + return ckpt-sit_nat_version_bitmap; + else + return ((char *)ckpt + F2FS_BLKSIZE); + } else { + offset = (flag == NAT_BITMAP) ? + le32_to_cpu(ckpt-sit_ver_bitmap_bytesize) : 0; + return ckpt-sit_nat_version_bitmap + offset; + } } static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) diff --git a/fsck/fsck.c b/fsck/fsck.c index 20582c9..a1d5dd0 100644 --- a/fsck/fsck.c +++ b/fsck/fsck.c @@ -653,11 +653,14 @@ int fsck_chk_orphan_node(struct f2fs_sb_info *sbi) block_t start_blk, orphan_blkaddr, i, j; struct f2fs_orphan_block *orphan_blk; + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) + if (!is_set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG)) return 0; - start_blk = __start_cp_addr(sbi) + 1; + start_blk = __start_cp_addr(sbi) + 1 + + le32_to_cpu(F2FS_RAW_SUPER(sbi)-cp_payload); + orphan_blkaddr = __start_sum_addr(sbi) - 1; orphan_blk = calloc(BLOCK_SZ, 1); diff --git a/fsck/mount.c b/fsck/mount.c index e2f3ace..24ef3bf 100644 --- a/fsck/mount.c +++ b/fsck/mount.c @@ -129,6 +129,7 @@ void print_raw_sb_info(struct f2fs_sb_info *sbi) DISP_u32(sb, root_ino); DISP_u32(sb, node_ino); DISP_u32(sb, meta_ino); + DISP_u32(sb, cp_payload); printf(\n); } @@ -285,6 +286,7 @@ void *validate_checkpoint(struct f2fs_sb_info *sbi, block_t cp_addr, unsigned lo /* Read the 2nd cp block in this CP pack */ cp_page_2 = malloc(PAGE_SIZE); cp_addr += le32_to_cpu(cp_block-cp_pack_total_block_count) - 1; + if (dev_read_block(cp_page_2, cp_addr) 0) goto invalid_cp2; @@ -295,7 +297,7 @@ void *validate_checkpoint(struct f2fs_sb_info *sbi, block_t cp_addr, unsigned lo crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); if (f2fs_crc_valid(crc, cp_block, crc_offset)) - goto invalid_cp1; + goto invalid_cp2; cur_version = le64_to_cpu(cp_block-checkpoint_ver); @@ -319,8 +321,9 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned long blk_size = sbi-blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; + unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)-cp_payload); - sbi-ckpt = malloc(blk_size); + sbi-ckpt = malloc(cp_blks * blk_size); if (!sbi-ckpt) return -ENOMEM; /* @@ -351,6 +354,20 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) memcpy(sbi-ckpt, cur_page, blk_size); + if (cp_blks 1) { + int i; + unsigned long long cp_blk_no; + + cp_blk_no = le32_to_cpu(raw_sb-cp_blkaddr); + if (cur_page == cp2) + cp_blk_no += 1 le32_to_cpu(raw_sb-log_blocks_per_seg); + /* copy sit bitmap */ + for (i = 1; i cp_blks; i++) { + unsigned char *ckpt = (unsigned char *)sbi-ckpt; + dev_read_block(cur_page, cp_blk_no + i); + memcpy(ckpt + i * blk_size, cur_page, blk_size); + } + } free(cp1); free(cp2); return 0; @@ -697,6 +714,7 @@ void check_block_count(struct f2fs_sb_info *sbi, int valid_blocks = 0; int i; + /* check segment usage */ ASSERT(GET_SIT_VBLOCKS(raw_sit) = sbi-blocks_per_seg); diff --git
[f2fs-dev] [PATCH v2] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages
Previously we allocate pages with no mapping in ra_sum_pages(), so we may encounter a crash in event trace of f2fs_submit_page_mbio where we access mapping data of the page. We'd better allocate pages in bd_inode mapping and invalidate these pages after we restore data from pages. It could avoid crash in above scenario. Changes from V1 o remove redundant code in ra_sum_pages() suggested by Jaegeuk Kim. Call Trace: [f1031630] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs] [f10377bb] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs] [f103c5da] restore_node_summary+0x13a/0x280 [f2fs] [f103e22d] build_curseg+0x2bd/0x620 [f2fs] [f104043b] build_segment_manager+0x1cb/0x920 [f2fs] [f1032c85] f2fs_fill_super+0x535/0x8e0 [f2fs] [c115b66a] mount_bdev+0x16a/0x1a0 [f102f63f] f2fs_mount+0x1f/0x30 [f2fs] [c115c096] mount_fs+0x36/0x170 [c1173635] vfs_kern_mount+0x55/0xe0 [c1175388] do_mount+0x1e8/0x900 [c1175d72] SyS_mount+0x82/0xc0 [c16059cc] sysenter_do_call+0x12/0x22 Suggested-by: Jaegeuk Kim jaegeuk@samsung.com Signed-off-by: Chao Yu chao2...@samsung.com --- fs/f2fs/node.c | 52 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3d60d3d..02a59e9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1658,35 +1658,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are linked in pages list. + * these pre-readed pages are alloced in bd_inode's mapping tree. */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) { - struct page *page; - int page_idx = start; + struct inode *inode = sbi-sb-s_bdev-bd_inode; + struct address_space *mapping = inode-i_mapping; + int i, page_idx = start; struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; - for (; page_idx start + nrpages; page_idx++) { - /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) + for (i = 0; page_idx start + nrpages; page_idx++, i++) { + /* alloc page in bd_inode for reading node summary info */ + pages[i] = grab_cache_page(mapping, page_idx); + if (!pages[i]) break; - - lock_page(page); - page-index = page_idx; - list_add_tail(page-lru, pages); + f2fs_submit_page_mbio(sbi, pages[i], page_idx, fio); } - list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page-index, fio); - f2fs_submit_merged_bio(sbi, META, READ); - - return page_idx - start; + return i; } int restore_node_summary(struct f2fs_sb_info *sbi, @@ -1694,11 +1688,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi, { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct page *page, *tmp; + struct inode *inode = sbi-sb-s_bdev-bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - int i, last_offset, nrpages, err = 0; - LIST_HEAD(page_list); + struct page *pages[bio_blocks]; + int i, idx, last_offset, nrpages, err = 0; /* scan the node segment */ last_offset = sbi-blocks_per_seg; @@ -1709,29 +1703,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, bio_blocks); /* read ahead node pages */ - nrpages = ra_sum_pages(sbi, page_list, addr, nrpages); + nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; - list_for_each_entry_safe(page, tmp, page_list, lru) { + for (idx = 0; idx nrpages; idx++) { if (err) goto skip; - lock_page(page); - if (unlikely(!PageUptodate(page))) { + lock_page(pages[idx]); + if (unlikely(!PageUptodate(pages[idx]))) { err = -EIO; } else { - rn = F2FS_NODE(page); + rn = F2FS_NODE(pages[idx]); sum_entry-nid = rn-footer.nid; sum_entry-version = 0; sum_entry-ofs_in_node = 0; sum_entry++; } - unlock_page(page); +
Re: [f2fs-dev] [PATCH v2] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages
Hi, Chao Could you think about following once. move node_inode in front of build_segment_manager, then use node_inode instead of bd_inode. On Tue, May 27, 2014 at 08:41:07AM +0800, Chao Yu wrote: Previously we allocate pages with no mapping in ra_sum_pages(), so we may encounter a crash in event trace of f2fs_submit_page_mbio where we access mapping data of the page. We'd better allocate pages in bd_inode mapping and invalidate these pages after we restore data from pages. It could avoid crash in above scenario. Changes from V1 o remove redundant code in ra_sum_pages() suggested by Jaegeuk Kim. Call Trace: [f1031630] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs] [f10377bb] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs] [f103c5da] restore_node_summary+0x13a/0x280 [f2fs] [f103e22d] build_curseg+0x2bd/0x620 [f2fs] [f104043b] build_segment_manager+0x1cb/0x920 [f2fs] [f1032c85] f2fs_fill_super+0x535/0x8e0 [f2fs] [c115b66a] mount_bdev+0x16a/0x1a0 [f102f63f] f2fs_mount+0x1f/0x30 [f2fs] [c115c096] mount_fs+0x36/0x170 [c1173635] vfs_kern_mount+0x55/0xe0 [c1175388] do_mount+0x1e8/0x900 [c1175d72] SyS_mount+0x82/0xc0 [c16059cc] sysenter_do_call+0x12/0x22 Suggested-by: Jaegeuk Kim jaegeuk@samsung.com Signed-off-by: Chao Yu chao2...@samsung.com --- fs/f2fs/node.c | 52 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3d60d3d..02a59e9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1658,35 +1658,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are linked in pages list. + * these pre-readed pages are alloced in bd_inode's mapping tree. */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) { - struct page *page; - int page_idx = start; + struct inode *inode = sbi-sb-s_bdev-bd_inode; + struct address_space *mapping = inode-i_mapping; + int i, page_idx = start; struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; - for (; page_idx start + nrpages; page_idx++) { - /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) + for (i = 0; page_idx start + nrpages; page_idx++, i++) { + /* alloc page in bd_inode for reading node summary info */ + pages[i] = grab_cache_page(mapping, page_idx); + if (!pages[i]) break; - - lock_page(page); - page-index = page_idx; - list_add_tail(page-lru, pages); + f2fs_submit_page_mbio(sbi, pages[i], page_idx, fio); } - list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page-index, fio); - f2fs_submit_merged_bio(sbi, META, READ); - - return page_idx - start; + return i; } int restore_node_summary(struct f2fs_sb_info *sbi, @@ -1694,11 +1688,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi, { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct page *page, *tmp; + struct inode *inode = sbi-sb-s_bdev-bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - int i, last_offset, nrpages, err = 0; - LIST_HEAD(page_list); + struct page *pages[bio_blocks]; + int i, idx, last_offset, nrpages, err = 0; /* scan the node segment */ last_offset = sbi-blocks_per_seg; @@ -1709,29 +1703,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, bio_blocks); /* read ahead node pages */ - nrpages = ra_sum_pages(sbi, page_list, addr, nrpages); + nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; - list_for_each_entry_safe(page, tmp, page_list, lru) { + for (idx = 0; idx nrpages; idx++) { if (err) goto skip; - lock_page(page); - if (unlikely(!PageUptodate(page))) { + lock_page(pages[idx]); + if (unlikely(!PageUptodate(pages[idx]))) { err = -EIO; } else { - rn = F2FS_NODE(page); + rn = F2FS_NODE(pages[idx]); sum_entry-nid = rn-footer.nid; sum_entry-version = 0;