[f2fs-dev] [PATCH] fsck.f2fs: supply more check entries for checkpoint

2018-08-02 Thread Junling Zheng
Supply some more check entries for checkpoint in sanity_check_ckpt()
and validate_checkpoint() to sync them with kernel.

Signed-off-by: Junling Zheng 
---
 fsck/mount.c  | 69 +--
 include/f2fs_fs.h |  1 +
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/fsck/mount.c b/fsck/mount.c
index 8fb4d59..2c2473d 100644
--- a/fsck/mount.c
+++ b/fsck/mount.c
@@ -747,6 +747,9 @@ void *validate_checkpoint(struct f2fs_sb_info *sbi, block_t 
cp_addr,
if (f2fs_crc_valid(crc, cp, crc_offset))
goto invalid_cp1;
 
+   if (get_cp(cp_pack_total_block_count) > sbi->blocks_per_seg)
+   goto invalid_cp1;
+
pre_version = get_cp(checkpoint_ver);
 
/* Read the 2nd cp block in this CP pack */
@@ -867,17 +870,79 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int total, fsmeta;
struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
+   unsigned int ovp_segments, reserved_segments;
+   unsigned int main_segs, blocks_per_seg;
+   unsigned int sit_segs, nat_segs;
+   unsigned int sit_bitmap_size, nat_bitmap_size;
+   unsigned int log_blocks_per_seg;
+   unsigned int segment_count_main;
+   unsigned int cp_pack_start_sum, cp_payload;
+   block_t user_block_count;
+   int i;
 
total = get_sb(segment_count);
fsmeta = get_sb(segment_count_ckpt);
-   fsmeta += get_sb(segment_count_sit);
-   fsmeta += get_sb(segment_count_nat);
+   sit_segs = get_sb(segment_count_sit);
+   fsmeta += sit_segs;
+   nat_segs = get_sb(segment_count_nat);
+   fsmeta += nat_segs;
fsmeta += get_cp(rsvd_segment_count);
fsmeta += get_sb(segment_count_ssa);
 
if (fsmeta >= total)
return 1;
 
+   ovp_segments = get_cp(overprov_segment_count);
+   reserved_segments = get_cp(rsvd_segment_count);
+
+   if (fsmeta < F2FS_MIN_SEGMENT || ovp_segments == 0 ||
+   reserved_segments == 0) {
+   MSG(0, "\tWrong layout: check mkfs.f2fs version\n");
+   return 1;
+   }
+
+   user_block_count = get_cp(user_block_count);
+   segment_count_main = get_sb(segment_count_main);
+   log_blocks_per_seg = get_sb(log_blocks_per_seg);
+   if (!user_block_count || user_block_count >=
+   segment_count_main << log_blocks_per_seg) {
+   MSG(0, "\tWrong user_block_count(%u)\n", user_block_count);
+   return 1;
+   }
+
+   main_segs = get_sb(segment_count_main);
+   blocks_per_seg = sbi->blocks_per_seg;
+
+   for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+   if (get_cp(cur_node_segno[i]) >= main_segs ||
+   get_cp(cur_node_blkoff[i]) >= blocks_per_seg)
+   return 1;
+   }
+   for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
+   if (get_cp(cur_data_segno[i]) >= main_segs ||
+   get_cp(cur_data_blkoff[i]) >= blocks_per_seg)
+   return 1;
+   }
+
+   sit_bitmap_size = get_cp(sit_ver_bitmap_bytesize);
+   nat_bitmap_size = get_cp(nat_ver_bitmap_bytesize);
+
+   if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
+   nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
+   MSG(0, "\tWrong bitmap size: sit(%u), nat(%u)\n",
+   sit_bitmap_size, nat_bitmap_size);
+   return 1;
+   }
+
+   cp_pack_start_sum = __start_sum_addr(sbi);
+   cp_payload = get_sb(cp_payload);
+   if (cp_pack_start_sum < cp_payload + 1 ||
+   cp_pack_start_sum > blocks_per_seg - 1 -
+   NR_CURSEG_TYPE) {
+   MSG(0, "\tWrong cp_pack_start_sum(%u)\n", cp_pack_start_sum);
+   return 1;
+   }
+
return 0;
 }
 
diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
index 53fa002..2c086a9 100644
--- a/include/f2fs_fs.h
+++ b/include/f2fs_fs.h
@@ -888,6 +888,7 @@ struct f2fs_nat_block {
  * F2FS uses 4 bytes to represent block address. As a result, supported size of
  * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments.
  */
+#define F2FS_MIN_SEGMENT  9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
 #define F2FS_MAX_SEGMENT   ((16 * 1024 * 1024) / 2)
 #define MAX_SIT_BITMAP_SIZE(SEG_ALIGN(SIZE_ALIGN(F2FS_MAX_SEGMENT, \
SIT_ENTRY_PER_BLOCK)) * \
-- 
2.18.0


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-dev

[f2fs-dev] [PATCH v7 1/2] f2fs: fix to avoid broken of dnode block list

2018-08-02 Thread Chao Yu
From: Chao Yu 

f2fs recovery flow is relying on dnode block link list, it means fsynced
file recovery depends on previous dnode's persistence in the list, so
during fsync() we should wait on all regular inode's dnode writebacked
before issuing flush.

By this way, we can avoid dnode block list being broken by out-of-order
IO submission due to IO scheduler or driver.

Sheng Yong helps to do the test with this patch:

Target:/data (f2fs, -)
64MB / 32768KB / 4KB / 8

1 / PERSIST / Index

Base:
SEQ-RD(MB/s)SEQ-WR(MB/s)RND-RD(IOPS)RND-WR(IOPS)
Insert(TPS) Update(TPS) Delete(TPS)
1   867.82  204.15  41440.0341370.54680.8   
1025.94 1031.08
2   871.87  205.87  41370.3 40275.2 791.14  
1065.84 1101.7
3   866.52  205.69  41795.6740596.16694.69  
1037.16 1031.48
Avg 868.737 205.237 41535.3 40747.3 722.21  
1042.98 1054.75

After:
SEQ-RD(MB/s)SEQ-WR(MB/s)RND-RD(IOPS)RND-WR(IOPS)
Insert(TPS) Update(TPS) Delete(TPS)
1   798.81  202.5   41143   40613.87602.71  
838.08  913.83
2   805.79  206.47  40297.2 41291.46604.44  
840.75  924.27
3   814.83  206.17  41209.5740453.62602.85  
834.66  927.91
Avg 806.477 205.047 40883.25667 40786.31667 
603.333 837.83  922.003

Patched/Original:
0.928332713 0.999074239 0.984300676 1.000957528 
0.835398753 0.803303994 0.874141189

It looks like atomic write will suffer performance regression.

I suspect that the criminal is that we forcing to wait all dnode being in
storage cache before we issue PREFLUSH+FUA.

BTW, will commit ("f2fs: don't need to wait for node writes for atomic write")
cause the problem: we will lose data of last transaction after SPO, even if
atomic write return no error:

- atomic_open();
- write() P1, P2, P3;
- atomic_commit();
 - writeback data: P1, P2, P3;
 - writeback node: N1, N2, N3;  <--- If N1, N2 is not writebacked, N3 with 
fsync_mark is
writebacked, In SPOR, we won't find N3 since node chain is broken, turns out 
that losing
last transaction.
 - preflush + fua;
- power-cut

If we don't wait dnode writeback for atomic_write:

SEQ-RD(MB/s)SEQ-WR(MB/s)RND-RD(IOPS)RND-WR(IOPS)
Insert(TPS) Update(TPS) Delete(TPS)
1   779.91  206.03  41621.5 40333.16716.9   
1038.21 1034.85
2   848.51  204.35  40082.4439486.17791.83  
1119.96 1083.77
3   772.12  206.27  41335.2541599.65723.29  
1055.07 971.92
Avg 800.18  205.55  41013.06333 40472.99333 
744.007 1071.08 1030.18

Patched/Original:
0.92108464  1.001526693 0.987425886 0.993268102 
1.030180511 1.026942031 0.976702294

SQLite's performance recovers.

Jaegeuk:
"Practically, I don't see db corruption becase of this. We can excuse to lose
the last transaction."

Finally, we decide to keep original implementation of atomic write interface
sematics that we don't wait all dnode writeback before preflush+fua submission.

Signed-off-by: Chao Yu 
---
v7:
- fix to export f2fs_wait_on_all_pages_writeback()
 fs/f2fs/checkpoint.c |   8 ++-
 fs/f2fs/data.c   |   2 +
 fs/f2fs/f2fs.h   |  22 ++-
 fs/f2fs/file.c   |   5 +-
 fs/f2fs/node.c   | 144 +++
 fs/f2fs/super.c  |   6 ++
 6 files changed, 156 insertions(+), 31 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 3587aa53dc90..2cffc37e6c5b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1161,7 +1161,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
 }
 
-static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
 {
DEFINE_WAIT(wait);
 
@@ -1397,7 +1397,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct 
cp_control *cpc)
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
 
/* wait for previous submitted meta pages writeback */
-   wait_on_all_pages_writeback(sbi);
+   f2fs_wait_on_all_pages_writeback(sbi);
 
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1406,7 +1406,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct 
cp_control *cpc)
 
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
-   wait_on_all_pages_writeback(sbi);
+   f2f

[f2fs-dev] [PATCH v2] f2fs: fix invalid memory access

2018-08-02 Thread Chao Yu
From: Chao Yu 

syzbot found the following crash on:

HEAD commit:d9bd94c0bcaa Add linux-next specific files for 20180801
git tree:   linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=1001189c40
kernel config:  https://syzkaller.appspot.com/x/.config?x=cc8964ea4d04518c
dashboard link: https://syzkaller.appspot.com/bug?extid=c966a82db0b14aa37e81
compiler:   gcc (GCC) 8.0.1 20180413 (experimental)

Unfortunately, I don't have any reproducer for this crash yet.

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+c966a82db0b14aa37...@syzkaller.appspotmail.com

loop7: rw=12288, want=8200, limit=20
netlink: 65342 bytes leftover after parsing attributes in process 
`syz-executor4'.
openvswitch: netlink: Message has 8 unknown bytes.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN
CPU: 1 PID: 7615 Comm: syz-executor7 Not tainted 4.18.0-rc7-next-20180801+ #29
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
RIP: 0010:__read_once_size include/linux/compiler.h:188 [inline]
RIP: 0010:compound_head include/linux/page-flags.h:142 [inline]
RIP: 0010:PageLocked include/linux/page-flags.h:272 [inline]
RIP: 0010:f2fs_put_page fs/f2fs/f2fs.h:2011 [inline]
RIP: 0010:validate_checkpoint+0x66d/0xec0 fs/f2fs/checkpoint.c:835
Code: e8 58 05 7f fe 4c 8d 6b 80 4d 8d 74 24 08 48 b8 00 00 00 00 00 fc ff df 
4c 89 ea 48 c1 ea 03 c6 04 02 00 4c 89 f2 48 c1 ea 03 <80> 3c 02 00 0f 85 f4 06 
00 00 4c 89 ea 4d 8b 7c 24 08 48 b8 00 00
RSP: 0018:8801937cebe8 EFLAGS: 00010246
RAX: dc00 RBX: 8801937cef30 RCX: c90006035000
RDX:  RSI: 82fd9658 RDI: 0005
RBP: 8801937cef58 R08: 8801ab254700 R09: f94000d9e026
R10: f94000d9e026 R11: ea0006cf0137 R12: fffb
R13: 8801937ceeb0 R14: 0003 R15: 880193419b40
FS:  7f36a61d5700() GS:8801db10() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fc04ff93000 CR3: 0001d0562000 CR4: 001426e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 f2fs_get_valid_checkpoint+0x436/0x1ec0 fs/f2fs/checkpoint.c:860
 f2fs_fill_super+0x2d42/0x8110 fs/f2fs/super.c:2883
 mount_bdev+0x314/0x3e0 fs/super.c:1344
 f2fs_mount+0x3c/0x50 fs/f2fs/super.c:3133
 legacy_get_tree+0x131/0x460 fs/fs_context.c:729
 vfs_get_tree+0x1cb/0x5c0 fs/super.c:1743
 do_new_mount fs/namespace.c:2603 [inline]
 do_mount+0x6f2/0x1e20 fs/namespace.c:2927
 ksys_mount+0x12d/0x140 fs/namespace.c:3143
 __do_sys_mount fs/namespace.c:3157 [inline]
 __se_sys_mount fs/namespace.c:3154 [inline]
 __x64_sys_mount+0xbe/0x150 fs/namespace.c:3154
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x45943a
Code: b8 a6 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 bd 8a fb ff c3 66 2e 0f 1f 
84 00 00 00 00 00 66 90 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 
9a 8a fb ff c3 66 0f 1f 84 00 00 00 00 00
RSP: 002b:7f36a61d4a88 EFLAGS: 0206 ORIG_RAX: 00a5
RAX: ffda RBX: 7f36a61d4b30 RCX: 0045943a
RDX: 7f36a61d4ad0 RSI: 2100 RDI: 7f36a61d4af0
RBP: 2100 R08: 7f36a61d4b30 R09: 7f36a61d4ad0
R10:  R11: 0206 R12: 0013
R13:  R14: 004c8ea0 R15: 
Modules linked in:
Dumping ftrace buffer:
   (ftrace buffer empty)
---[ end trace bd8550c129352286 ]---
RIP: 0010:__read_once_size include/linux/compiler.h:188 [inline]
RIP: 0010:compound_head include/linux/page-flags.h:142 [inline]
RIP: 0010:PageLocked include/linux/page-flags.h:272 [inline]
RIP: 0010:f2fs_put_page fs/f2fs/f2fs.h:2011 [inline]
RIP: 0010:validate_checkpoint+0x66d/0xec0 fs/f2fs/checkpoint.c:835
Code: e8 58 05 7f fe 4c 8d 6b 80 4d 8d 74 24 08 48 b8 00 00 00 00 00 fc ff df 
4c 89 ea 48 c1 ea 03 c6 04 02 00 4c 89 f2 48 c1 ea 03 <80> 3c 02 00 0f 85 f4 06 
00 00 4c 89 ea 4d 8b 7c 24 08 48 b8 00 00
RSP: 0018:8801937cebe8 EFLAGS: 00010246
RAX: dc00 RBX: 8801937cef30 RCX: c90006035000
RDX:  RSI: 82fd9658 RDI: 0005
netlink: 65342 bytes leftover after parsing attributes in process 
`syz-executor4'.
RBP: 8801937cef58 R08: 8801ab254700 R09: f94000d9e026
openvswitch: netlink: Message has 8 unknown bytes.
R10: f94000d9e026 R11: ea0006cf0137 R12: fffb
R13: 8801937ceeb0 R14: 0003 R15: 880193419b40
FS:  7f36a61d5700() GS:8801db10() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7fc04ff93000 CR3: 0001d0562000 CR4: 001426e0
DR0:  DR1:  DR2:

Re: [f2fs-dev] [PATCH] f2fs: use true and false for boolean values

2018-08-02 Thread Chao Yu
On 2018/8/2 8:51, Gustavo A. R. Silva wrote:
> Return statements in functions returning bool should use true or false
> instead of an integer value.
> 
> This issue was detected with the help of Coccinelle.
> 
> Signed-off-by: Gustavo A. R. Silva 

Reviewed-by: Chao Yu 

Thanks,

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel