Re: [f2fs-dev] [PATCH] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages

2014-05-26 Thread Chao Yu
Hi Kim,

 -Original Message-
 From: Jaegeuk Kim [mailto:jaeg...@kernel.org]
 Sent: Wednesday, May 21, 2014 11:37 AM
 To: Chao Yu
 Cc: Jaegeuk Kim; linux-fsde...@vger.kernel.org; linux-ker...@vger.kernel.org;
 linux-f2fs-devel@lists.sourceforge.net
 Subject: Re: [f2fs-dev] [PATCH] f2fs: avoid crash when trace 
 f2fs_submit_page_mbio event in
 ra_sum_pages
 
 Hi Chao,
 
 2014-05-16 (금), 17:14 +0800, Chao Yu:
  Previously we allocate pages with no mapping in ra_sum_pages(), so we may
  encounter a crash in event trace of f2fs_submit_page_mbio where we access
  mapping data of the page.
 
  We'd better allocate pages in bd_inode mapping and invalidate these pages 
  after
  we restore data from pages. It could avoid crash in above scenario.
 
  Call Trace:
   [f1031630] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs]
   [f10377bb] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs]
   [f103c5da] restore_node_summary+0x13a/0x280 [f2fs]
   [f103e22d] build_curseg+0x2bd/0x620 [f2fs]
   [f104043b] build_segment_manager+0x1cb/0x920 [f2fs]
   [f1032c85] f2fs_fill_super+0x535/0x8e0 [f2fs]
   [c115b66a] mount_bdev+0x16a/0x1a0
   [f102f63f] f2fs_mount+0x1f/0x30 [f2fs]
   [c115c096] mount_fs+0x36/0x170
   [c1173635] vfs_kern_mount+0x55/0xe0
   [c1175388] do_mount+0x1e8/0x900
   [c1175d72] SyS_mount+0x82/0xc0
   [c16059cc] sysenter_do_call+0x12/0x22
 
  Signed-off-by: Chao Yu chao2...@samsung.com
  ---
   fs/f2fs/node.c |   49 -
   1 file changed, 28 insertions(+), 21 deletions(-)
 
  diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
  index 3d60d3d..b5cd814 100644
  --- a/fs/f2fs/node.c
  +++ b/fs/f2fs/node.c
  @@ -1658,13 +1658,16 @@ int recover_inode_page(struct f2fs_sb_info *sbi, 
  struct page *page)
 
   /*
* ra_sum_pages() merge contiguous pages into one bio and submit.
  - * these pre-readed pages are linked in pages list.
  + * these pre-readed pages are alloced in bd_inode's mapping tree.
*/
  -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
  +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
  int start, int nrpages)
   {
  struct page *page;
  +   struct inode *inode = sbi-sb-s_bdev-bd_inode;
  +   struct address_space *mapping = inode-i_mapping;
  int page_idx = start;
  +   int alloced, readed;
  struct f2fs_io_info fio = {
  .type = META,
  .rw = READ_SYNC | REQ_META | REQ_PRIO
  @@ -1672,21 +1675,23 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, 
  struct list_head
 *pages,
 
  for (; page_idx  start + nrpages; page_idx++) {
  /* alloc temporal page for read node summary info*/
  -   page = alloc_page(GFP_F2FS_ZERO);
  +   page = grab_cache_page(mapping, page_idx);
  if (!page)
  break;
  -
  -   lock_page(page);
  -   page-index = page_idx;
  -   list_add_tail(page-lru, pages);
  +   page_cache_release(page);
 
 IMO, we don't need to do like this.
 Instead,
   for() {
   page = grab_cache_page();
   if (!page)
   break;
   page[page_idx] = page;
   f2fs_submit_page_mbio(sbi, page, fio);
   }
   f2fs_submit_merged_bio(sbi, META, READ);
   return page_idx - start;

Agreed, it helps to remove a lot of redundant code here.
Thanks!

 
 Afterwards, in restore_node_summry(),
   lock_page() will wait the end_io for read.
   ...
   f2fs_put_page(pages[index], 1);
 
 Thanks,
 
  }
 
  -   list_for_each_entry(page, pages, lru)
  -   f2fs_submit_page_mbio(sbi, page, page-index, fio);
  +   alloced = page_idx - start;
  +   readed = find_get_pages_contig(mapping, start, alloced, pages);
  +   BUG_ON(alloced != readed);
  +
  +   for (page_idx = 0; page_idx  readed; page_idx++)
  +   f2fs_submit_page_mbio(sbi, pages[page_idx],
  +   pages[page_idx]-index, fio);
 
  f2fs_submit_merged_bio(sbi, META, READ);
 
  -   return page_idx - start;
  +   return readed;
   }
 
   int restore_node_summary(struct f2fs_sb_info *sbi,
  @@ -1694,11 +1699,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
   {
  struct f2fs_node *rn;
  struct f2fs_summary *sum_entry;
  -   struct page *page, *tmp;
  +   struct inode *inode = sbi-sb-s_bdev-bd_inode;
  block_t addr;
  int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
  -   int i, last_offset, nrpages, err = 0;
  -   LIST_HEAD(page_list);
  +   struct page *pages[bio_blocks];
  +   int i, index, last_offset, nrpages, err = 0;
 
  /* scan the node segment */
  last_offset = sbi-blocks_per_seg;
  @@ -1709,29 +1714,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
  nrpages = min(last_offset - i, bio_blocks);
 
  /* read ahead node pages */
  -   nrpages = ra_sum_pages(sbi, page_list, addr, nrpages);
  

Re: [f2fs-dev] [PATCH] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages

2014-05-26 Thread Changman Lee
On Mon, May 26, 2014 at 02:26:24PM +0800, Chao Yu wrote:
 Hi changman,
 
  -Original Message-
  From: Changman Lee [mailto:cm224@samsung.com]
  Sent: Friday, May 23, 2014 1:14 PM
  To: Jaegeuk Kim
  Cc: Chao Yu; linux-fsde...@vger.kernel.org; linux-ker...@vger.kernel.org;
  linux-f2fs-devel@lists.sourceforge.net
  Subject: Re: [f2fs-dev] [PATCH] f2fs: avoid crash when trace 
  f2fs_submit_page_mbio event in
  ra_sum_pages
  
  On Wed, May 21, 2014 at 12:36:46PM +0900, Jaegeuk Kim wrote:
   Hi Chao,
  
   2014-05-16 (금), 17:14 +0800, Chao Yu:
Previously we allocate pages with no mapping in ra_sum_pages(), so we 
may
encounter a crash in event trace of f2fs_submit_page_mbio where we 
access
mapping data of the page.
   
We'd better allocate pages in bd_inode mapping and invalidate these 
pages after
we restore data from pages. It could avoid crash in above scenario.
   
Call Trace:
 [f1031630] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs]
 [f10377bb] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs]
 [f103c5da] restore_node_summary+0x13a/0x280 [f2fs]
 [f103e22d] build_curseg+0x2bd/0x620 [f2fs]
 [f104043b] build_segment_manager+0x1cb/0x920 [f2fs]
 [f1032c85] f2fs_fill_super+0x535/0x8e0 [f2fs]
 [c115b66a] mount_bdev+0x16a/0x1a0
 [f102f63f] f2fs_mount+0x1f/0x30 [f2fs]
 [c115c096] mount_fs+0x36/0x170
 [c1173635] vfs_kern_mount+0x55/0xe0
 [c1175388] do_mount+0x1e8/0x900
 [c1175d72] SyS_mount+0x82/0xc0
 [c16059cc] sysenter_do_call+0x12/0x22
   
Signed-off-by: Chao Yu chao2...@samsung.com
---
 fs/f2fs/node.c |   49 -
 1 file changed, 28 insertions(+), 21 deletions(-)
   
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 3d60d3d..b5cd814 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1658,13 +1658,16 @@ int recover_inode_page(struct f2fs_sb_info 
*sbi, struct page *page)
   
 /*
  * ra_sum_pages() merge contiguous pages into one bio and submit.
- * these pre-readed pages are linked in pages list.
+ * these pre-readed pages are alloced in bd_inode's mapping tree.
  */
-static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head 
*pages,
+static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
int start, int nrpages)
 {
struct page *page;
+   struct inode *inode = sbi-sb-s_bdev-bd_inode;
  
  How about use sbi-meta_inode instead of bd_inode, then we can do
  caching summary pages for further i/o.
 
 In my understanding, In ra_sum_pages() we readahead node pages in NODE 
 segment,
 then we could padding current summary caching with nid of node page's footer.
 So we should not cache this readaheaded pages in meta_inode's mapping.
 Do I miss something?
 
 Regards
 

Sorry, you're right. Forget about caching. I've confused ra_sum_pages with 
summary segments.

  
+   struct address_space *mapping = inode-i_mapping;
int page_idx = start;
+   int alloced, readed;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
@@ -1672,21 +1675,23 @@ static int ra_sum_pages(struct f2fs_sb_info 
*sbi, struct list_head
  *pages,
   
for (; page_idx  start + nrpages; page_idx++) {
/* alloc temporal page for read node summary info*/
-   page = alloc_page(GFP_F2FS_ZERO);
+   page = grab_cache_page(mapping, page_idx);
if (!page)
break;
-
-   lock_page(page);
-   page-index = page_idx;
-   list_add_tail(page-lru, pages);
+   page_cache_release(page);
  
   IMO, we don't need to do like this.
   Instead,
 for() {
 page = grab_cache_page();
 if (!page)
 break;
 page[page_idx] = page;
 f2fs_submit_page_mbio(sbi, page, fio);
 }
 f2fs_submit_merged_bio(sbi, META, READ);
 return page_idx - start;
  
   Afterwards, in restore_node_summry(),
 lock_page() will wait the end_io for read.
 ...
 f2fs_put_page(pages[index], 1);
  
   Thanks,
  
}
   
-   list_for_each_entry(page, pages, lru)
-   f2fs_submit_page_mbio(sbi, page, page-index, fio);
+   alloced = page_idx - start;
+   readed = find_get_pages_contig(mapping, start, alloced, pages);
+   BUG_ON(alloced != readed);
+
+   for (page_idx = 0; page_idx  readed; page_idx++)
+   f2fs_submit_page_mbio(sbi, pages[page_idx],
+   pages[page_idx]-index, fio);
   
f2fs_submit_merged_bio(sbi, META, READ);
   
-   return page_idx - start;
+   return readed;
 }
   

[f2fs-dev] [PATCH 1/2 V3] mkfs.f2fs: large volume support

2014-05-26 Thread Changman Lee

Changes from V2
 o remove CP_LARGE_VOL_LFLAG instead, use cp_payload in superblock
 because disk size is determined at format

Changes from V1
 o fix orphan node blkaddr

-- 8 --

From 7e5e66699bb383e4fa7ce970e1cc8e10eb0a5c6f Mon Sep 17 00:00:00 2001
From: root root@f2fs-00.(none)
Date: Mon, 12 May 2014 22:01:38 +0900
Subject: [PATCH 1/2] mkfs.f2fs: large volume support

This patch supports large volume over about 3TB.

Signed-off-by: Changman Lee cm224@samsung.com
---
 include/f2fs_fs.h  |9 +++
 mkfs/f2fs_format.c |   68 +++-
 2 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
index 94d8dc3..3003f7f 100644
--- a/include/f2fs_fs.h
+++ b/include/f2fs_fs.h
@@ -223,6 +223,7 @@ enum {
 #define F2FS_LOG_SECTORS_PER_BLOCK 3   /* 4KB: F2FS_BLKSIZE */
 #define F2FS_BLKSIZE   4096/* support only 4KB block */
 #define F2FS_MAX_EXTENSION 64  /* # of extension entries */
+#define F2FS_BLK_ALIGN(x)  (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
 
 #define NULL_ADDR  0x0U
 #define NEW_ADDR   -1U
@@ -279,6 +280,7 @@ struct f2fs_super_block {
__le16 volume_name[512];/* volume name */
__le32 extension_count; /* # of extensions below */
__u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
+   __le32 cp_payload;
 } __attribute__((packed));
 
 /*
@@ -457,6 +459,13 @@ struct f2fs_nat_block {
 #define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
 
 /*
+ * F2FS uses 4 bytes to represent block address. As a result, supported size of
+ * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments.
+ */
+#define F2FS_MAX_SEGMENT   ((16 * 1024 * 1024) / 2)
+#define MAX_SIT_BITMAP_SIZE((F2FS_MAX_SEGMENT / SIT_ENTRY_PER_BLOCK) / 8)
+
+/*
  * Note that f2fs_sit_entry-vblocks has the following bit-field information.
  * [15:10] : allocation type such as CURSEG__TYPE
  * [9:0] : valid block count
diff --git a/mkfs/f2fs_format.c b/mkfs/f2fs_format.c
index cdbf74a..58550a2 100644
--- a/mkfs/f2fs_format.c
+++ b/mkfs/f2fs_format.c
@@ -102,7 +102,8 @@ static int f2fs_prepare_super_block(void)
u_int32_t blocks_for_sit, blocks_for_nat, blocks_for_ssa;
u_int32_t total_valid_blks_available;
u_int64_t zone_align_start_offset, diff, total_meta_segments;
-   u_int32_t sit_bitmap_size, max_nat_bitmap_size, max_nat_segments;
+   u_int32_t sit_bitmap_size, max_sit_bitmap_size;
+   u_int32_t max_nat_bitmap_size, max_nat_segments;
u_int32_t total_zones;
 
super_block.magic = cpu_to_le32(F2FS_SUPER_MAGIC);
@@ -217,8 +218,25 @@ static int f2fs_prepare_super_block(void)
 */
sit_bitmap_size = ((le32_to_cpu(super_block.segment_count_sit) / 2) 
log_blks_per_seg) / 8;
-   max_nat_bitmap_size = CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) 
+ 1 -
-   sit_bitmap_size;
+
+   if (sit_bitmap_size  MAX_SIT_BITMAP_SIZE)
+   max_sit_bitmap_size = MAX_SIT_BITMAP_SIZE;
+   else
+   max_sit_bitmap_size = sit_bitmap_size;
+
+   /*
+* It should be reserved minimum 1 segment for nat.
+* When sit is too large, we should expand cp area. It requires more 
pages for cp.
+*/
+   if (max_sit_bitmap_size 
+   (CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 
65)) {
+   max_nat_bitmap_size = CHECKSUM_OFFSET - sizeof(struct 
f2fs_checkpoint) + 1;
+   super_block.cp_payload = F2FS_BLK_ALIGN(max_sit_bitmap_size);
+   } else {
+   max_nat_bitmap_size = CHECKSUM_OFFSET - sizeof(struct 
f2fs_checkpoint) + 1 - max_sit_bitmap_size;
+   super_block.cp_payload = 0;
+   }
+
max_nat_segments = (max_nat_bitmap_size * 8)  log_blks_per_seg;
 
if (le32_to_cpu(super_block.segment_count_nat)  max_nat_segments)
@@ -434,6 +452,7 @@ static int f2fs_write_check_point_pack(void)
u_int64_t cp_seg_blk_offset = 0;
u_int32_t crc = 0;
int i;
+   char *cp_payload = NULL;
 
ckp = calloc(F2FS_BLKSIZE, 1);
if (ckp == NULL) {
@@ -447,6 +466,12 @@ static int f2fs_write_check_point_pack(void)
return -1;
}
 
+   cp_payload = calloc(F2FS_BLKSIZE, 1);
+   if (cp_payload == NULL) {
+   MSG(1, \tError: Calloc Failed for cp_payload!!!\n);
+   return -1;
+   }
+
/* 1. cp page 1 of checkpoint pack 1 */
ckp-checkpoint_ver = cpu_to_le64(1);
ckp-cur_node_segno[0] =
@@ -485,9 +510,11 @@ static int f2fs_write_check_point_pack(void)
((le32_to_cpu(ckp-free_segment_count) + 6 -
le32_to_cpu(ckp-overprov_segment_count)) *
 config.blks_per_seg));
-   ckp-cp_pack_total_block_count 

[f2fs-dev] [PATCH 2/2 V3] fsck.f2fs: large volume support

2014-05-26 Thread Changman Lee
Changes from V2
 o remove CP_LARGE_VOL_FLAG instead, use cp_payload in superblock
  because disk size is determined at format

Changes from V1
 o fix orphan node blkaddr

-- 8 --

From 405367374f868a8cf29bef62c06bf53271b58f52 Mon Sep 17 00:00:00 2001
From: Changman Lee cm224@samsung.com
Date: Mon, 12 May 2014 22:03:46 +0900
Subject: [PATCH 2/2] fsck.f2fs: large volume support

This patch support large volume over about 3TB.

Signed-off-by: Changman Lee cm224@samsung.com
---
 fsck/f2fs.h   |   14 +++---
 fsck/fsck.c   |7 +--
 fsck/mount.c  |   22 --
 lib/libf2fs.c |4 ++--
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/fsck/f2fs.h b/fsck/f2fs.h
index e1740fe..427a733 100644
--- a/fsck/f2fs.h
+++ b/fsck/f2fs.h
@@ -203,9 +203,17 @@ static inline unsigned long __bitmap_size(struct 
f2fs_sb_info *sbi, int flag)
 static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
 {
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
-   int offset = (flag == NAT_BITMAP) ?
-   le32_to_cpu(ckpt-sit_ver_bitmap_bytesize) : 0;
-   return ckpt-sit_nat_version_bitmap + offset;
+   int offset;
+   if (le32_to_cpu(F2FS_RAW_SUPER(sbi)-cp_payload)  0) {
+   if (flag == NAT_BITMAP)
+   return ckpt-sit_nat_version_bitmap;
+   else
+   return ((char *)ckpt + F2FS_BLKSIZE);
+   } else {
+   offset = (flag == NAT_BITMAP) ?
+   le32_to_cpu(ckpt-sit_ver_bitmap_bytesize) : 0;
+   return ckpt-sit_nat_version_bitmap + offset;
+   }
 }
 
 static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int 
f)
diff --git a/fsck/fsck.c b/fsck/fsck.c
index 20582c9..a1d5dd0 100644
--- a/fsck/fsck.c
+++ b/fsck/fsck.c
@@ -653,11 +653,14 @@ int fsck_chk_orphan_node(struct f2fs_sb_info *sbi)
 
block_t start_blk, orphan_blkaddr, i, j;
struct f2fs_orphan_block *orphan_blk;
+   struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 
-   if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
+   if (!is_set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG))
return 0;
 
-   start_blk = __start_cp_addr(sbi) + 1;
+   start_blk = __start_cp_addr(sbi) + 1 +
+   le32_to_cpu(F2FS_RAW_SUPER(sbi)-cp_payload);
+
orphan_blkaddr = __start_sum_addr(sbi) - 1;
 
orphan_blk = calloc(BLOCK_SZ, 1);
diff --git a/fsck/mount.c b/fsck/mount.c
index e2f3ace..24ef3bf 100644
--- a/fsck/mount.c
+++ b/fsck/mount.c
@@ -129,6 +129,7 @@ void print_raw_sb_info(struct f2fs_sb_info *sbi)
DISP_u32(sb, root_ino);
DISP_u32(sb, node_ino);
DISP_u32(sb, meta_ino);
+   DISP_u32(sb, cp_payload);
printf(\n);
 }
 
@@ -285,6 +286,7 @@ void *validate_checkpoint(struct f2fs_sb_info *sbi, block_t 
cp_addr, unsigned lo
/* Read the 2nd cp block in this CP pack */
cp_page_2 = malloc(PAGE_SIZE);
cp_addr += le32_to_cpu(cp_block-cp_pack_total_block_count) - 1;
+
if (dev_read_block(cp_page_2, cp_addr)  0)
goto invalid_cp2;
 
@@ -295,7 +297,7 @@ void *validate_checkpoint(struct f2fs_sb_info *sbi, block_t 
cp_addr, unsigned lo
 
crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
if (f2fs_crc_valid(crc, cp_block, crc_offset))
-   goto invalid_cp1;
+   goto invalid_cp2;
 
cur_version = le64_to_cpu(cp_block-checkpoint_ver);
 
@@ -319,8 +321,9 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned long blk_size = sbi-blocksize;
unsigned long long cp1_version = 0, cp2_version = 0;
unsigned long long cp_start_blk_no;
+   unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)-cp_payload);
 
-   sbi-ckpt = malloc(blk_size);
+   sbi-ckpt = malloc(cp_blks * blk_size);
if (!sbi-ckpt)
return -ENOMEM;
/*
@@ -351,6 +354,20 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 
memcpy(sbi-ckpt, cur_page, blk_size);
 
+   if (cp_blks  1) {
+   int i;
+   unsigned long long cp_blk_no;
+
+   cp_blk_no = le32_to_cpu(raw_sb-cp_blkaddr);
+   if (cur_page == cp2)
+   cp_blk_no += 1  
le32_to_cpu(raw_sb-log_blocks_per_seg);
+   /* copy sit bitmap */
+   for (i = 1; i  cp_blks; i++) {
+   unsigned char *ckpt = (unsigned char *)sbi-ckpt;
+   dev_read_block(cur_page, cp_blk_no + i);
+   memcpy(ckpt + i * blk_size, cur_page, blk_size);
+   }
+   }
free(cp1);
free(cp2);
return 0;
@@ -697,6 +714,7 @@ void check_block_count(struct f2fs_sb_info *sbi,
int valid_blocks = 0;
int i;
 
+
/* check segment usage */
ASSERT(GET_SIT_VBLOCKS(raw_sit) = sbi-blocks_per_seg);
 
diff --git 

[f2fs-dev] [PATCH v2] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages

2014-05-26 Thread Chao Yu
Previously we allocate pages with no mapping in ra_sum_pages(), so we may
encounter a crash in event trace of f2fs_submit_page_mbio where we access
mapping data of the page.

We'd better allocate pages in bd_inode mapping and invalidate these pages after
we restore data from pages. It could avoid crash in above scenario.

Changes from V1
 o remove redundant code in ra_sum_pages() suggested by Jaegeuk Kim.

Call Trace:
 [f1031630] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs]
 [f10377bb] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs]
 [f103c5da] restore_node_summary+0x13a/0x280 [f2fs]
 [f103e22d] build_curseg+0x2bd/0x620 [f2fs]
 [f104043b] build_segment_manager+0x1cb/0x920 [f2fs]
 [f1032c85] f2fs_fill_super+0x535/0x8e0 [f2fs]
 [c115b66a] mount_bdev+0x16a/0x1a0
 [f102f63f] f2fs_mount+0x1f/0x30 [f2fs]
 [c115c096] mount_fs+0x36/0x170
 [c1173635] vfs_kern_mount+0x55/0xe0
 [c1175388] do_mount+0x1e8/0x900
 [c1175d72] SyS_mount+0x82/0xc0
 [c16059cc] sysenter_do_call+0x12/0x22

Suggested-by: Jaegeuk Kim jaegeuk@samsung.com
Signed-off-by: Chao Yu chao2...@samsung.com
---
 fs/f2fs/node.c |   52 
 1 file changed, 24 insertions(+), 28 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 3d60d3d..02a59e9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1658,35 +1658,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct 
page *page)
 
 /*
  * ra_sum_pages() merge contiguous pages into one bio and submit.
- * these pre-readed pages are linked in pages list.
+ * these pre-readed pages are alloced in bd_inode's mapping tree.
  */
-static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
+static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
int start, int nrpages)
 {
-   struct page *page;
-   int page_idx = start;
+   struct inode *inode = sbi-sb-s_bdev-bd_inode;
+   struct address_space *mapping = inode-i_mapping;
+   int i, page_idx = start;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
 
-   for (; page_idx  start + nrpages; page_idx++) {
-   /* alloc temporal page for read node summary info*/
-   page = alloc_page(GFP_F2FS_ZERO);
-   if (!page)
+   for (i = 0; page_idx  start + nrpages; page_idx++, i++) {
+   /* alloc page in bd_inode for reading node summary info */
+   pages[i] = grab_cache_page(mapping, page_idx);
+   if (!pages[i])
break;
-
-   lock_page(page);
-   page-index = page_idx;
-   list_add_tail(page-lru, pages);
+   f2fs_submit_page_mbio(sbi, pages[i], page_idx, fio);
}
 
-   list_for_each_entry(page, pages, lru)
-   f2fs_submit_page_mbio(sbi, page, page-index, fio);
-
f2fs_submit_merged_bio(sbi, META, READ);
-
-   return page_idx - start;
+   return i;
 }
 
 int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1694,11 +1688,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
 {
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
-   struct page *page, *tmp;
+   struct inode *inode = sbi-sb-s_bdev-bd_inode;
block_t addr;
int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
-   int i, last_offset, nrpages, err = 0;
-   LIST_HEAD(page_list);
+   struct page *pages[bio_blocks];
+   int i, idx, last_offset, nrpages, err = 0;
 
/* scan the node segment */
last_offset = sbi-blocks_per_seg;
@@ -1709,29 +1703,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
nrpages = min(last_offset - i, bio_blocks);
 
/* read ahead node pages */
-   nrpages = ra_sum_pages(sbi, page_list, addr, nrpages);
+   nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
if (!nrpages)
return -ENOMEM;
 
-   list_for_each_entry_safe(page, tmp, page_list, lru) {
+   for (idx = 0; idx  nrpages; idx++) {
if (err)
goto skip;
 
-   lock_page(page);
-   if (unlikely(!PageUptodate(page))) {
+   lock_page(pages[idx]);
+   if (unlikely(!PageUptodate(pages[idx]))) {
err = -EIO;
} else {
-   rn = F2FS_NODE(page);
+   rn = F2FS_NODE(pages[idx]);
sum_entry-nid = rn-footer.nid;
sum_entry-version = 0;
sum_entry-ofs_in_node = 0;
sum_entry++;
}
-   unlock_page(page);
+   

Re: [f2fs-dev] [PATCH v2] f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages

2014-05-26 Thread Changman Lee
Hi, Chao

Could you think about following once.
move node_inode in front of build_segment_manager, then use node_inode
instead of bd_inode.

On Tue, May 27, 2014 at 08:41:07AM +0800, Chao Yu wrote:
 Previously we allocate pages with no mapping in ra_sum_pages(), so we may
 encounter a crash in event trace of f2fs_submit_page_mbio where we access
 mapping data of the page.
 
 We'd better allocate pages in bd_inode mapping and invalidate these pages 
 after
 we restore data from pages. It could avoid crash in above scenario.
 
 Changes from V1
  o remove redundant code in ra_sum_pages() suggested by Jaegeuk Kim.
 
 Call Trace:
  [f1031630] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs]
  [f10377bb] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs]
  [f103c5da] restore_node_summary+0x13a/0x280 [f2fs]
  [f103e22d] build_curseg+0x2bd/0x620 [f2fs]
  [f104043b] build_segment_manager+0x1cb/0x920 [f2fs]
  [f1032c85] f2fs_fill_super+0x535/0x8e0 [f2fs]
  [c115b66a] mount_bdev+0x16a/0x1a0
  [f102f63f] f2fs_mount+0x1f/0x30 [f2fs]
  [c115c096] mount_fs+0x36/0x170
  [c1173635] vfs_kern_mount+0x55/0xe0
  [c1175388] do_mount+0x1e8/0x900
  [c1175d72] SyS_mount+0x82/0xc0
  [c16059cc] sysenter_do_call+0x12/0x22
 
 Suggested-by: Jaegeuk Kim jaegeuk@samsung.com
 Signed-off-by: Chao Yu chao2...@samsung.com
 ---
  fs/f2fs/node.c |   52 
  1 file changed, 24 insertions(+), 28 deletions(-)
 
 diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
 index 3d60d3d..02a59e9 100644
 --- a/fs/f2fs/node.c
 +++ b/fs/f2fs/node.c
 @@ -1658,35 +1658,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, 
 struct page *page)
  
  /*
   * ra_sum_pages() merge contiguous pages into one bio and submit.
 - * these pre-readed pages are linked in pages list.
 + * these pre-readed pages are alloced in bd_inode's mapping tree.
   */
 -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
 +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
   int start, int nrpages)
  {
 - struct page *page;
 - int page_idx = start;
 + struct inode *inode = sbi-sb-s_bdev-bd_inode;
 + struct address_space *mapping = inode-i_mapping;
 + int i, page_idx = start;
   struct f2fs_io_info fio = {
   .type = META,
   .rw = READ_SYNC | REQ_META | REQ_PRIO
   };
  
 - for (; page_idx  start + nrpages; page_idx++) {
 - /* alloc temporal page for read node summary info*/
 - page = alloc_page(GFP_F2FS_ZERO);
 - if (!page)
 + for (i = 0; page_idx  start + nrpages; page_idx++, i++) {
 + /* alloc page in bd_inode for reading node summary info */
 + pages[i] = grab_cache_page(mapping, page_idx);
 + if (!pages[i])
   break;
 -
 - lock_page(page);
 - page-index = page_idx;
 - list_add_tail(page-lru, pages);
 + f2fs_submit_page_mbio(sbi, pages[i], page_idx, fio);
   }
  
 - list_for_each_entry(page, pages, lru)
 - f2fs_submit_page_mbio(sbi, page, page-index, fio);
 -
   f2fs_submit_merged_bio(sbi, META, READ);
 -
 - return page_idx - start;
 + return i;
  }
  
  int restore_node_summary(struct f2fs_sb_info *sbi,
 @@ -1694,11 +1688,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
  {
   struct f2fs_node *rn;
   struct f2fs_summary *sum_entry;
 - struct page *page, *tmp;
 + struct inode *inode = sbi-sb-s_bdev-bd_inode;
   block_t addr;
   int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
 - int i, last_offset, nrpages, err = 0;
 - LIST_HEAD(page_list);
 + struct page *pages[bio_blocks];
 + int i, idx, last_offset, nrpages, err = 0;
  
   /* scan the node segment */
   last_offset = sbi-blocks_per_seg;
 @@ -1709,29 +1703,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
   nrpages = min(last_offset - i, bio_blocks);
  
   /* read ahead node pages */
 - nrpages = ra_sum_pages(sbi, page_list, addr, nrpages);
 + nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
   if (!nrpages)
   return -ENOMEM;
  
 - list_for_each_entry_safe(page, tmp, page_list, lru) {
 + for (idx = 0; idx  nrpages; idx++) {
   if (err)
   goto skip;
  
 - lock_page(page);
 - if (unlikely(!PageUptodate(page))) {
 + lock_page(pages[idx]);
 + if (unlikely(!PageUptodate(pages[idx]))) {
   err = -EIO;
   } else {
 - rn = F2FS_NODE(page);
 + rn = F2FS_NODE(pages[idx]);
   sum_entry-nid = rn-footer.nid;
   sum_entry-version = 0;