[f2fs-dev] [PATCH v7 2/8] libf2fs_zoned: Introduce f2fs_report_zone() helper function

2019-11-13 Thread Shin'ichiro Kawasaki
To prepare for write pointer consistency check by fsck, add
f2fs_report_zone() helper function which calls REPORT ZONE command to
get write pointer status of a single zone. The function is added to
lib/libf2fs_zoned which gathers zoned block device related functions.

Signed-off-by: Shin'ichiro Kawasaki 
Reviewed-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
 include/f2fs_fs.h   |  1 +
 lib/libf2fs_zoned.c | 33 +
 2 files changed, 34 insertions(+)

diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
index fe18dff..d6ea688 100644
--- a/include/f2fs_fs.h
+++ b/include/f2fs_fs.h
@@ -1319,6 +1319,7 @@ blk_zone_cond_str(struct blk_zone *blkz)
 
 extern int f2fs_get_zoned_model(int);
 extern int f2fs_get_zone_blocks(int);
+extern int f2fs_report_zone(int, u_int64_t, void *);
 typedef int (report_zones_cb_t)(int i, void *, void *);
 extern int f2fs_report_zones(int, report_zones_cb_t *, void *);
 extern int f2fs_check_zones(int);
diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c
index 8ad4171..5328c56 100644
--- a/lib/libf2fs_zoned.c
+++ b/lib/libf2fs_zoned.c
@@ -191,6 +191,33 @@ int f2fs_get_zone_blocks(int i)
return 0;
 }
 
+int f2fs_report_zone(int i, u_int64_t sector, void *blkzone)
+{
+   struct blk_zone *blkz = (struct blk_zone *)blkzone;
+   struct blk_zone_report *rep;
+   int ret = -1;
+
+   rep = malloc(sizeof(struct blk_zone_report) + sizeof(struct blk_zone));
+   if (!rep) {
+   ERR_MSG("No memory for report zones\n");
+   return -ENOMEM;
+   }
+
+   rep->sector = sector;
+   rep->nr_zones = 1;
+   ret = ioctl(c.devices[i].fd, BLKREPORTZONE, rep);
+   if (ret != 0) {
+   ret = -errno;
+   ERR_MSG("ioctl BLKREPORTZONE failed: errno=%d\n", errno);
+   goto out;
+   }
+
+   *blkz = *(struct blk_zone *)(rep + 1);
+out:
+   free(rep);
+   return ret;
+}
+
 #define F2FS_REPORT_ZONES_BUFSZ524288
 
 int f2fs_report_zones(int j, report_zones_cb_t *report_zones_cb, void *opaque)
@@ -425,6 +452,12 @@ out:
 
 #else
 
+int f2fs_report_zone(int i, u_int64_t sector, void *blkzone)
+{
+   ERR_MSG("%d: Unsupported zoned block device\n", i);
+   return -1;
+}
+
 int f2fs_report_zones(int i, report_zones_cb_t *report_zones_cb, void *opaque)
 {
ERR_MSG("%d: Unsupported zoned block device\n", i);
-- 
2.23.0



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v7 4/8] fsck: Find free zones instead of blocks to assign to current segments

2019-11-13 Thread Shin'ichiro Kawasaki
When fsck needs to assign a new area to a curreng segment, it calls
find_next_free_block() function to find a new block to assign. For zoned
block devices, fsck checks write pointer consistency with current
segments' positions. In case a curseg is inconsistent with the
write pointer of the zone it points to, fsck should assign not a new free
block but a new free zone/section with write pointer at the zone start,
so that next write to the current segment succeeds without error.

To extend find_next_free_block() function's capability to find not only
a block but also a zone/section, add new_sec flag to
find_next_free_block() function. When new_sec flag is true, skip check
for each block's availability so that the check is done with unit of
section. Note that it is ensured that one zone has one section for f2fs
on zoned block devices. Then the logic to find a new free section is good
to find a new free zone.

When fsck target devices have ZONED_HM model, set new_sec flag true to
call find_next_free_block() from move_curseg_info(). Set curseg's
alloc_type not SSR but LFS for the devices with ZONED_HM model, because
SSR block allocation is not allowed for zoned block devices. Also skip
relocate_curseg_offset() for the devices with ZONED_HM model for the
same reason.

Signed-off-by: Shin'ichiro Kawasaki 
Reviewed-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
 fsck/defrag.c  |  2 +-
 fsck/f2fs.h|  1 +
 fsck/fsck.h|  2 +-
 fsck/mount.c   | 13 +
 fsck/segment.c |  2 +-
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/fsck/defrag.c b/fsck/defrag.c
index fc6b7cf..3473637 100644
--- a/fsck/defrag.c
+++ b/fsck/defrag.c
@@ -77,7 +77,7 @@ int f2fs_defragment(struct f2fs_sb_info *sbi, u64 from, u64 
len, u64 to, int lef
if (!f2fs_test_bit(offset, (const char *)se->cur_valid_map))
continue;
 
-   if (find_next_free_block(sbi, , left, se->type)) {
+   if (find_next_free_block(sbi, , left, se->type, false)) {
MSG(0, "Not enough space to migrate blocks");
return -1;
}
diff --git a/fsck/f2fs.h b/fsck/f2fs.h
index 399c74d..59d2cc8 100644
--- a/fsck/f2fs.h
+++ b/fsck/f2fs.h
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/fsck/fsck.h b/fsck/fsck.h
index ccf4a39..8da0ebb 100644
--- a/fsck/fsck.h
+++ b/fsck/fsck.h
@@ -191,7 +191,7 @@ extern void zero_journal_entries(struct f2fs_sb_info *);
 extern void flush_sit_entries(struct f2fs_sb_info *);
 extern void move_curseg_info(struct f2fs_sb_info *, u64, int);
 extern void write_curseg_info(struct f2fs_sb_info *);
-extern int find_next_free_block(struct f2fs_sb_info *, u64 *, int, int);
+extern int find_next_free_block(struct f2fs_sb_info *, u64 *, int, int, bool);
 extern void duplicate_checkpoint(struct f2fs_sb_info *);
 extern void write_checkpoint(struct f2fs_sb_info *);
 extern void write_checkpoints(struct f2fs_sb_info *);
diff --git a/fsck/mount.c b/fsck/mount.c
index 7d268e3..7ce885c 100644
--- a/fsck/mount.c
+++ b/fsck/mount.c
@@ -2445,6 +2445,9 @@ int relocate_curseg_offset(struct f2fs_sb_info *sbi, int 
type)
struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
unsigned int i;
 
+   if (c.zoned_model == F2FS_ZONED_HM)
+   return -EINVAL;
+
for (i = 0; i < sbi->blocks_per_seg; i++) {
if (!f2fs_test_bit(i, (const char *)se->cur_valid_map))
break;
@@ -2477,7 +2480,8 @@ void set_section_type(struct f2fs_sb_info *sbi, unsigned 
int segno, int type)
}
 }
 
-int find_next_free_block(struct f2fs_sb_info *sbi, u64 *to, int left, int 
want_type)
+int find_next_free_block(struct f2fs_sb_info *sbi, u64 *to, int left,
+   int want_type, bool new_sec)
 {
struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
struct seg_entry *se;
@@ -2535,7 +2539,7 @@ int find_next_free_block(struct f2fs_sb_info *sbi, u64 
*to, int left, int want_t
}
}
 
-   if (type == want_type &&
+   if (type == want_type && !new_sec &&
!f2fs_test_bit(offset, (const char *)bitmap))
return 0;
 
@@ -2561,13 +2565,14 @@ void move_curseg_info(struct f2fs_sb_info *sbi, u64 
from, int left)
ASSERT(ret >= 0);
 
to = from;
-   ret = find_next_free_block(sbi, , left, i);
+   ret = find_next_free_block(sbi, , left, i,
+  c.zoned_model == F2FS_ZONED_HM);
ASSERT(ret == 0);
 
old_segno = curseg->segno;
curseg->segno = GET_SEGNO(sbi, to);
curseg->next_blkoff = OFFSET_IN_SEG(sbi, to);
-   curseg->alloc_type = SSR;
+   curseg->alloc_type = c.zoned_model == F2FS_ZONED_HM ? LFS : 

[f2fs-dev] [PATCH v7 0/8] fsck: Check write pointers of zoned block devices

2019-11-13 Thread Shin'ichiro Kawasaki
On sudden f2fs shutdown, zoned block device status and f2fs meta data can be
inconsistent. When f2fs shutdown happens during write operations, write pointers
on the device go forward but the f2fs meta data does not reflect the write
pointer progress. This inconsistency will eventually cause "Unaligned write
command" error when restarting write operation after the next mount.

This error is observed with xfstests test case generic/388, which enforces
sudden shutdown during write operation and checks the file system recovery.

This patch series adds a feature to fsck.f2fs to check and fix the
inconsistency. Per discussion on the list, implement two checks. The first check
is for open zones that current segments point to. Check write pointer
consistency with current segment positions recorded in CP, and if they are
inconsistent, assign a new zone to the current segment. The second check is for
non-open zones that current segments do not point to. Check write pointer
consistency with valid block maps recorded in SIT.

Reflect fsync data blocks in these checks. If fsync data exists and current
segments point to zones with fsync data, keep the fsync data and the current
segments untouched so that kernel can recover the fsync data. Another patch
series for kernel is being posted to check and fix write pointer consistency
after fsync data recovery.

Have fsck check and fix the consistency twice. The first fix is at the beginning
of fsck so that write by fsck for fix do not fail with unaligned write command
error. The second fix is at the end of the fsck to reflect SIT valid block maps
updates by fsck.

The first three patches add three helper functions to call report zone and reset
zone commands to zoned block devices. Next three patches modify existing fsck
functions to meet zoned block devices' requirements. The last two patches add
the two checks for write pointer consistency.

Thank goes to Chao Yu for the detailed discussion on the list.


For v7, rebased to dev-test branch. The patches from 1st to 6th in this series
are taken from the dev-test branch, which reflects Jaegeuk's updates and Signed-
off-by tags (Thanks Jaegeuk!). It will be appreciated to check 7th and 8th
patches.

Changes from v6:
* 1st-6th patches: Taken from dev-test branch, Jaegeuk's updates and sign off
* 7th patch: Reversed bitmap search order and improved last valid block check
* 8th patch: Improved code comment

Changes from v5:
* 1st-3rd patch: Reflected review comments on helper functions
* 8th patch: Ensure zones are in main segments and removed errno print

Changes from v4:
* Renewed the series based on design discussion on the list

Changes from v3:
* Set curseg position at a new zone start when its write pointer is at zone end

Changes from v2:
* Reflected review comments by Chao Yu
* Dropped 4th patch and 2nd patch (2nd patch was required for the 4th patch)

Changes from v1:
* Fixed build failure on dev branch

Shin'ichiro Kawasaki (8):
  libf2fs_zoned: Introduce f2fs_report_zones() helper function
  libf2fs_zoned: Introduce f2fs_report_zone() helper function
  libf2fs_zoned: Introduce f2fs_reset_zone() helper function
  fsck: Find free zones instead of blocks to assign to current segments
  fsck: Introduce move_one_curseg_info() function
  fsck: Check fsync data always for zoned block devices
  fsck: Check write pointer consistency of open zones
  fsck: Check write pointer consistency of non-open zones

 fsck/defrag.c   |   2 +-
 fsck/f2fs.h |   6 +
 fsck/fsck.c | 271 
 fsck/fsck.h |  11 +-
 fsck/main.c |   2 +
 fsck/mount.c| 139 ---
 fsck/segment.c  |   4 +-
 include/f2fs_fs.h   |   7 ++
 lib/libf2fs_zoned.c | 124 +++-
 9 files changed, 520 insertions(+), 46 deletions(-)

-- 
2.23.0



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v7 5/8] fsck: Introduce move_one_curseg_info() function

2019-11-13 Thread Shin'ichiro Kawasaki
When fsck updates one of the current segments, update_curseg_info() is
called specifying a single current segment as its argument. However,
update_curseg_info() calls move_curseg_info() function which updates all
six current segments. Then update_curseg_info() for a single current
segment moves all current segments.

This excessive current segment move causes an issue when a new zone is
assigned to a current segment because of write pointer inconsistency.
Even when a current segment has write pointer inconsistency, all other
current segments should not be moved because they may have fsync data
at their positions.

To avoid the excessive current segment move, introduce
move_one_curseg_info() function which does same work as
move_curseg_info() only for a single current segment. Call
move_one_curseg_info() in place of move_curseg_info() from
update_curseg_info().

Signed-off-by: Shin'ichiro Kawasaki 
Reviewed-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
 fsck/mount.c | 68 
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/fsck/mount.c b/fsck/mount.c
index 7ce885c..cd6b51b 100644
--- a/fsck/mount.c
+++ b/fsck/mount.c
@@ -2548,52 +2548,58 @@ int find_next_free_block(struct f2fs_sb_info *sbi, u64 
*to, int left,
return -1;
 }
 
-void move_curseg_info(struct f2fs_sb_info *sbi, u64 from, int left)
+static void move_one_curseg_info(struct f2fs_sb_info *sbi, u64 from, int left,
+int i)
 {
-   int i, ret;
+   struct curseg_info *curseg = CURSEG_I(sbi, i);
+   struct f2fs_summary_block buf;
+   u32 old_segno;
+   u64 ssa_blk, to;
+   int ret;
 
-   /* update summary blocks having nullified journal entries */
-   for (i = 0; i < NO_CHECK_TYPE; i++) {
-   struct curseg_info *curseg = CURSEG_I(sbi, i);
-   struct f2fs_summary_block buf;
-   u32 old_segno;
-   u64 ssa_blk, to;
+   /* update original SSA too */
+   ssa_blk = GET_SUM_BLKADDR(sbi, curseg->segno);
+   ret = dev_write_block(curseg->sum_blk, ssa_blk);
+   ASSERT(ret >= 0);
 
-   /* update original SSA too */
-   ssa_blk = GET_SUM_BLKADDR(sbi, curseg->segno);
-   ret = dev_write_block(curseg->sum_blk, ssa_blk);
-   ASSERT(ret >= 0);
+   to = from;
+   ret = find_next_free_block(sbi, , left, i,
+  c.zoned_model == F2FS_ZONED_HM);
+   ASSERT(ret == 0);
 
-   to = from;
-   ret = find_next_free_block(sbi, , left, i,
-  c.zoned_model == F2FS_ZONED_HM);
-   ASSERT(ret == 0);
+   old_segno = curseg->segno;
+   curseg->segno = GET_SEGNO(sbi, to);
+   curseg->next_blkoff = OFFSET_IN_SEG(sbi, to);
+   curseg->alloc_type = c.zoned_model == F2FS_ZONED_HM ? LFS : SSR;
 
-   old_segno = curseg->segno;
-   curseg->segno = GET_SEGNO(sbi, to);
-   curseg->next_blkoff = OFFSET_IN_SEG(sbi, to);
-   curseg->alloc_type = c.zoned_model == F2FS_ZONED_HM ? LFS : SSR;
+   /* update new segno */
+   ssa_blk = GET_SUM_BLKADDR(sbi, curseg->segno);
+   ret = dev_read_block(, ssa_blk);
+   ASSERT(ret >= 0);
 
-   /* update new segno */
-   ssa_blk = GET_SUM_BLKADDR(sbi, curseg->segno);
-   ret = dev_read_block(, ssa_blk);
-   ASSERT(ret >= 0);
+   memcpy(curseg->sum_blk, , SUM_ENTRIES_SIZE);
 
-   memcpy(curseg->sum_blk, , SUM_ENTRIES_SIZE);
+   /* update se->types */
+   reset_curseg(sbi, i);
 
-   /* update se->types */
-   reset_curseg(sbi, i);
+   FIX_MSG("Move curseg[%d] %x -> %x after %"PRIx64"\n",
+   i, old_segno, curseg->segno, from);
+}
 
-   DBG(1, "Move curseg[%d] %x -> %x after %"PRIx64"\n",
-   i, old_segno, curseg->segno, from);
-   }
+void move_curseg_info(struct f2fs_sb_info *sbi, u64 from, int left)
+{
+   int i;
+
+   /* update summary blocks having nullified journal entries */
+   for (i = 0; i < NO_CHECK_TYPE; i++)
+   move_one_curseg_info(sbi, from, left, i);
 }
 
 void update_curseg_info(struct f2fs_sb_info *sbi, int type)
 {
if (!relocate_curseg_offset(sbi, type))
return;
-   move_curseg_info(sbi, SM_I(sbi)->main_blkaddr, 0);
+   move_one_curseg_info(sbi, SM_I(sbi)->main_blkaddr, 0, type);
 }
 
 void zero_journal_entries(struct f2fs_sb_info *sbi)
-- 
2.23.0



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v7 3/8] libf2fs_zoned: Introduce f2fs_reset_zone() helper function

2019-11-13 Thread Shin'ichiro Kawasaki
To prepare for write pointer consistency fix by fsck, add
f2fs_reset_zone() helper function which calls RESET ZONE command. The
function is added to lib/libf2fs_zoned which gathers zoned block device
related functions.

When f2fs-tools are built without blkzoned.h kernel header, the helper
function f2fs_reset_zone() prints an error message as other helper
functions in lib/libf2fs_zoned print. To make the message consistent
through the all helper functions, modify message strings in
f2fs_check_zones() and f2fs_reset_zones().

Signed-off-by: Shin'ichiro Kawasaki 
Signed-off-by: Jaegeuk Kim 
---
 include/f2fs_fs.h   |  1 +
 lib/libf2fs_zoned.c | 32 ++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
index d6ea688..9c26314 100644
--- a/include/f2fs_fs.h
+++ b/include/f2fs_fs.h
@@ -1323,6 +1323,7 @@ extern int f2fs_report_zone(int, u_int64_t, void *);
 typedef int (report_zones_cb_t)(int i, void *, void *);
 extern int f2fs_report_zones(int, report_zones_cb_t *, void *);
 extern int f2fs_check_zones(int);
+int f2fs_reset_zone(int, void *);
 extern int f2fs_reset_zones(int);
 
 #define SIZE_ALIGN(val, size)  ((val) + (size) - 1) / (size)
diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c
index 5328c56..8b88fe9 100644
--- a/lib/libf2fs_zoned.c
+++ b/lib/libf2fs_zoned.c
@@ -388,6 +388,28 @@ out:
return ret;
 }
 
+int f2fs_reset_zone(int i, void *blkzone)
+{
+   struct blk_zone *blkz = (struct blk_zone *)blkzone;
+   struct device_info *dev = c.devices + i;
+   struct blk_zone_range range;
+   int ret;
+
+   if (!blk_zone_seq(blkz) || blk_zone_empty(blkz))
+   return 0;
+
+   /* Non empty sequential zone: reset */
+   range.sector = blk_zone_sector(blkz);
+   range.nr_sectors = blk_zone_length(blkz);
+   ret = ioctl(dev->fd, BLKRESETZONE, );
+   if (ret != 0) {
+   ret = -errno;
+   ERR_MSG("ioctl BLKRESETZONE failed: errno=%d\n", errno);
+   }
+
+   return ret;
+}
+
 int f2fs_reset_zones(int j)
 {
struct device_info *dev = c.devices + j;
@@ -487,13 +509,19 @@ int f2fs_get_zone_blocks(int i)
 
 int f2fs_check_zones(int i)
 {
-   ERR_MSG("%d: Zoned block devices are not supported\n", i);
+   ERR_MSG("%d: Unsupported zoned block device\n", i);
+   return -1;
+}
+
+int f2fs_reset_zone(int i, void *blkzone)
+{
+   ERR_MSG("%d: Unsupported zoned block device\n", i);
return -1;
 }
 
 int f2fs_reset_zones(int i)
 {
-   ERR_MSG("%d: Zoned block devices are not supported\n", i);
+   ERR_MSG("%d: Unsupported zoned block device\n", i);
return -1;
 }
 
-- 
2.23.0



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v7 7/8] fsck: Check write pointer consistency of open zones

2019-11-13 Thread Shin'ichiro Kawasaki
On sudden f2fs shutdown, write pointers of zoned block devices can go
further but f2fs meta data keeps current segments at positions before the
write operations. After remounting the f2fs, this inconsistency causes
write operations not at write pointers and "Unaligned write command"
error is reported.

To avoid the error, have f2fs.fsck check consistency of write pointers
of open zones that current segments point to. Compare each current
segment's position and the write pointer position of the open zone. If
inconsistency is found and 'fix_on' flag is set, assign a new zone to the
current segment and check the newly assigned zone has write pointer at
the zone start. Leave the original zone as is to keep data recorded in
it.

To care about fsync data, refer each seg_entry's ckpt_valid_map to get
the last valid block in the zone. If the last valid block is beyond the
current segments position, fsync data exits in the zone. In case fsync
data exists, do not assign a new zone to the current segment not to lose
the fsync data. It is expected that the kernel replay the fsync data and
fix the write pointer inconsistency at mount time.

Also check consistency between write pointer of the zone the current
segment points to with valid block maps of the zone. If the last valid
block is beyond the write pointer position, report to indicate a bug. If
'fix_on' flag is set, assign a new zone to the current segment.

When inconsistencies are found, turn on 'bug_on' flag in fsck_verify() to
ask users to fix them or not. When inconsistencies get fixed, turn on
'force' flag in fsck_verify() to enforce fixes in following checks.

This check and fix is done twice. The first is done at the beginning of
do_fsck() function so that other fixes can reflect the current segment
modification. The second is done in fsck_verify() to reflect updated meta
data by other fixes.

Signed-off-by: Shin'ichiro Kawasaki 
---
 fsck/f2fs.h  |   5 ++
 fsck/fsck.c  | 152 +++
 fsck/fsck.h  |   3 +
 fsck/main.c  |   2 +
 fsck/mount.c |  50 -
 5 files changed, 211 insertions(+), 1 deletion(-)

diff --git a/fsck/f2fs.h b/fsck/f2fs.h
index 59d2cc8..55d6b07 100644
--- a/fsck/f2fs.h
+++ b/fsck/f2fs.h
@@ -430,6 +430,11 @@ static inline block_t __end_block_addr(struct f2fs_sb_info 
*sbi)
 #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr)\
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
 
+#define GET_SEC_FROM_SEG(sbi, segno)   \
+   ((segno) / (sbi)->segs_per_sec)
+#define GET_SEG_FROM_SEC(sbi, secno)   \
+   ((secno) * (sbi)->segs_per_sec)
+
 #define FREE_I_START_SEGNO(sbi)
\
GET_SEGNO_FROM_SEG0(sbi, SM_I(sbi)->main_blkaddr)
 #define GET_R2L_SEGNO(sbi, segno)  (segno + FREE_I_START_SEGNO(sbi))
diff --git a/fsck/fsck.c b/fsck/fsck.c
index 2ae3bd5..37ac30a 100644
--- a/fsck/fsck.c
+++ b/fsck/fsck.c
@@ -2181,6 +2181,123 @@ static void fix_checkpoints(struct f2fs_sb_info *sbi)
fix_checkpoint(sbi);
 }
 
+#ifdef HAVE_LINUX_BLKZONED_H
+
+/*
+ * Refer valid block map and return offset of the last valid block in the zone.
+ * Obtain valid block map from SIT and fsync data.
+ * If there is no valid block in the zone, return -1.
+ */
+static int last_vblk_off_in_zone(struct f2fs_sb_info *sbi,
+unsigned int zone_segno)
+{
+   int s, b;
+   unsigned int segs_per_zone = sbi->segs_per_sec * sbi->secs_per_zone;
+   struct seg_entry *se;
+
+   for (s = segs_per_zone - 1; s >= 0; s--) {
+   se = get_seg_entry(sbi, zone_segno + s);
+
+   /*
+* Refer not cur_valid_map but ckpt_valid_map which reflects
+* fsync data.
+*/
+   ASSERT(se->ckpt_valid_map);
+   for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
+   if (f2fs_test_bit(b, (const char*)se->ckpt_valid_map))
+   return b + (s << sbi->log_blocks_per_seg);
+   }
+
+   return -1;
+}
+
+static int check_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
+{
+   struct curseg_info *curseg = CURSEG_I(sbi, type);
+   struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
+   struct blk_zone blkz;
+   block_t cs_block, wp_block, zone_last_vblock;
+   u_int64_t cs_sector, wp_sector;
+   int i, ret;
+   unsigned int zone_segno;
+   int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
+
+   /* get the device the curseg points to */
+   cs_block = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
+   for (i = 0; i < MAX_DEVICES; i++) {
+   if (!c.devices[i].path)
+   break;
+   if (c.devices[i].start_blkaddr <= cs_block &&
+   cs_block <= c.devices[i].end_blkaddr)
+   

Re: [f2fs-dev] [PATCH] fscrypt: support passing a keyring key to FS_IOC_ADD_ENCRYPTION_KEY

2019-11-13 Thread Eric Biggers
On Wed, Nov 06, 2019 at 04:12:59PM -0800, Eric Biggers wrote:
> From: Eric Biggers 
> 
> Extend the FS_IOC_ADD_ENCRYPTION_KEY ioctl to allow the raw key to be
> specified by a Linux keyring key, rather than specified directly.
> 
> This is useful because fscrypt keys belong to a particular filesystem
> instance, so they are destroyed when that filesystem is unmounted.
> Usually this is desired.  But in some cases, userspace may need to
> unmount and re-mount the filesystem while keeping the keys, e.g. during
> a system update.  This requires keeping the keys somewhere else too.
> 
> The keys could be kept in memory in a userspace daemon.  But depending
> on the security architecture and assumptions, it can be preferable to
> keep them only in kernel memory, where they are unreadable by userspace.
> 
> We also can't solve this by going back to the original fscrypt API
> (where for each file, the master key was looked up in the process's
> keyring hierarchy) because that caused lots of problems of its own.
> 
> Therefore, add the ability for FS_IOC_ADD_ENCRYPTION_KEY to accept a
> Linux keyring key.  This solves the problem by allowing userspace to (if
> needed) save the keys securely in a Linux keyring for re-provisioning,
> while still using the new fscrypt key management ioctls.
> 
> This is analogous to how dm-crypt accepts a Linux keyring key, but the
> key is then stored internally in the dm-crypt data structures rather
> than being looked up again each time the dm-crypt device is accessed.
> 
> Use a custom key type "fscrypt-provisioning" rather than one of the
> existing key types such as "logon".  This is strongly desired because it
> enforces that these keys are only usable for a particular purpose: for
> fscrypt as input to a particular KDF.  Otherwise, the keys could also be
> passed to any kernel API that accepts a "logon" key with any service
> prefix, e.g. dm-crypt, UBIFS, or (recently proposed) AF_ALG.  This would
> risk leaking information about the raw key despite it ostensibly being
> unreadable.  Of course, this mistake has already been made for multiple
> kernel APIs; but since this is a new API, let's do it right.
> 
> Signed-off-by: Eric Biggers 

David and Jarkko, are you okay with this patch from a keyrings subsystem
perspective?

- Eric


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [man-pages RFC PATCH] statx.2: document STATX_ATTR_VERITY

2019-11-13 Thread Eric Biggers
On Sat, Nov 09, 2019 at 08:34:51PM +0100, walter harms wrote:
> Am 08.11.2019 20:35, schrieb Eric Biggers:
> > On Fri, Nov 08, 2019 at 09:23:04AM +0100, walter harms wrote:
> >>
> >>
> >> Am 07.11.2019 23:02, schrieb Eric Biggers:
> >>> From: Eric Biggers 
> >>>
> >>> Document the verity attribute for statx().
> >>>
> >>> Signed-off-by: Eric Biggers 
> >>> ---
> >>>  man2/statx.2 | 4 
> >>>  1 file changed, 4 insertions(+)
> >>>
> >>> RFC since the kernel patches are currently under review.
> >>> The kernel patches can be found here:
> >>> https://lkml.kernel.org/linux-fscrypt/20191029204141.145309-1-ebigg...@kernel.org/T/#u
> >>>
> >>> diff --git a/man2/statx.2 b/man2/statx.2
> >>> index d2f1b07b8..713bd1260 100644
> >>> --- a/man2/statx.2
> >>> +++ b/man2/statx.2
> >>> @@ -461,6 +461,10 @@ See
> >>>  .TP
> >>>  .B STATX_ATTR_ENCRYPTED
> >>>  A key is required for the file to be encrypted by the filesystem.
> >>> +.TP
> >>> +.B STATX_ATTR_VERITY
> >>> +The file has fs-verity enabled.  It cannot be written to, and all reads 
> >>> from it
> >>> +will be verified against a Merkle tree.
> >>
> >> Using "Merkle tree" opens a can of worm and what will happen when the 
> >> methode will change ?
> >> Does it matter at all ? i would suggest "filesystem" here.
> >>
> > 
> > Fundamentally, fs-verity guarantees that all data read is verified against a
> > cryptographic hash that covers the entire file.  I think it will be helpful 
> > to
> > convey that here, e.g. to avoid confusion with non-cryptographic, individual
> > block checksums supported by filesystems like btrfs and zfs.
> > 
> > Now, the only sane way to implement this model is with a Merkle tree, and 
> > this
> > is part of the fs-verity UAPI (via the file hash), so that's where I'm 
> > coming
> > from here.  Perhaps the phrase "Merkle tree" could be interpreted too 
> > strictly,
> > though, so it would be better to emphasize the more abstract model.  How 
> > about
> > the following?:
> > 
> > The file has fs-verity enabled.  It cannot be written to, and all reads
> > from it will be verified against a cryptographic hash that covers the
> > entire file, e.g. via a Merkle tree.
> > 
> 
> "feels" better,. but from a programmers perspective it is important at what 
> level
> this is actually done. To see my point look at the line before.
> "encrypted by the filesystem" mean i have to read the documentation of the fs 
> first
> so if encryption is supported at all. Or do i think to complicated ?
> 

It's filesystem-specific whether encryption and verity are supported.  I'm not
sure what your concern is, as statx() won't return the bits if the filesystem
doesn't support them.

Also note, if someone really wants the details about fscrypt and fsverity, they
really should read the documentation we maintain in the kernel tree [1][2].

[1] https://www.kernel.org/doc/html/latest/filesystems/fscrypt.html
[2] https://www.kernel.org/doc/html/latest/filesystems/fsverity.html

- Eric


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH 0/4] statx: expose the fs-verity bit

2019-11-13 Thread Eric Biggers
On Tue, Oct 29, 2019 at 01:41:37PM -0700, Eric Biggers wrote:
> This patchset exposes the verity bit (a.k.a. FS_VERITY_FL) via statx().
> 
> This is useful because it allows applications to check whether a file is
> a verity file without opening it.  Opening a verity file can be
> expensive because the fsverity_info is set up on open, which involves
> parsing metadata and optionally verifying a cryptographic signature.
> 
> This is analogous to how various other bits are exposed through both
> FS_IOC_GETFLAGS and statx(), e.g. the encrypt bit.
> 
> This patchset applies to v5.4-rc5.
> 
> Eric Biggers (4):
>   statx: define STATX_ATTR_VERITY
>   ext4: support STATX_ATTR_VERITY
>   f2fs: support STATX_ATTR_VERITY
>   docs: fs-verity: mention statx() support
> 
>  Documentation/filesystems/fsverity.rst | 8 
>  fs/ext4/inode.c| 5 -
>  fs/f2fs/file.c | 5 -
>  include/linux/stat.h   | 3 ++-
>  include/uapi/linux/stat.h  | 2 +-
>  5 files changed, 19 insertions(+), 4 deletions(-)

Applied to fscrypt.git#fsverity for 5.5.

- Eric


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] docs: fs-verity: document first supported kernel version

2019-11-13 Thread Eric Biggers
On Wed, Oct 30, 2019 at 03:19:15PM -0700, Eric Biggers wrote:
> From: Eric Biggers 
> 
> I had meant to replace these TODOs with the actual version when applying
> the patches, but forgot to do so.  Do it now.
> 
> Signed-off-by: Eric Biggers 
> ---
>  Documentation/filesystems/fsverity.rst | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/filesystems/fsverity.rst 
> b/Documentation/filesystems/fsverity.rst
> index 3355377a2439..a95536b6443c 100644
> --- a/Documentation/filesystems/fsverity.rst
> +++ b/Documentation/filesystems/fsverity.rst
> @@ -406,7 +406,7 @@ pages have been read into the pagecache.  (See `Verifying 
> data`_.)
>  ext4
>  
>  
> -ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
> +ext4 supports fs-verity since Linux v5.4 and e2fsprogs v1.45.2.
>  
>  To create verity files on an ext4 filesystem, the filesystem must have
>  been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
> @@ -442,7 +442,7 @@ also only supports extent-based files.
>  f2fs
>  
>  
> -f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
> +f2fs supports fs-verity since Linux v5.4 and f2fs-tools v1.11.0.
>  
>  To create verity files on an f2fs filesystem, the filesystem must have
>  been formatted with ``-O verity``.
> -- 
> 2.23.0
> 

Applied to fscrypt.git#fsverity for 5.5.

- Eric


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH V2 2/2] fs: Move swap_[de]activate to file_operations

2019-11-13 Thread David Sterba
On Tue, Nov 12, 2019 at 04:42:44PM -0800, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> swap_activate() and swap_deactivate() have nothing to do with address
> spaces.  We want to be able to change the address space operations on
> the fly to allow changing inode flags dynamically.
> 
> Switching address space operations can be difficult to do reliably.[1]
> Therefore, to simplify switching address space operations we reduce the
> number of functions in those operations by moving swap_activate() and
> swap_deactivate() out of the address space operations.
> 
> No functionality is changed with this patch.
> 
> This has been tested with XFS but not NFS, f2fs, or btrfs.
> 
> Also note we move some functions to facilitate compilation.  But there
> are no functional changes are contained within those diffs.
> 
> [1] https://lkml.org/lkml/2019/11/11/572
> 
> Cc: Dave Chinner 
> Cc: linux-fsde...@vger.kernel.org
> Cc: linux-ker...@vger.kernel.org
> Suggested-by: Jan Kara 
> Signed-off-by: Ira Weiny 
> 
> ---
> Changes from V0:
>   Update cover letter.
>   fix btrfs as per Andrew's comments
>   change xfs_iomap_swapfile_activate() to xfs_file_swap_activate()
> 
> Changes from V1:
>   Update recipients list
> 
> 
>  fs/btrfs/file.c| 341 +
>  fs/btrfs/inode.c   | 340 

For the btrfs part

Acked-by: David Sterba 

There's going to be a minor conflict with current 5.5 queue, the
resolution is simple rename of btrfs_block_group_cache to btrfs_block_group.


___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [RFC PATCH v4] f2fs: support data compression

2019-11-13 Thread Chao Yu
This patch tries to support compression in f2fs.

- New term named cluster is defined as basic unit of compression, file can
be divided into multiple clusters logically. One cluster includes 4 << n
(n >= 0) logical pages, compression size is also cluster size, each of
cluster can be compressed or not.

- In cluster metadata layout, one special flag is used to indicate cluster
is compressed one or normal one, for compressed cluster, following metadata
maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs stores
data including compress header and compressed data.

- In order to eliminate write amplification during overwrite, F2FS only
support compression on write-once file, data can be compressed only when
all logical blocks in file are valid and cluster compress ratio is lower
than specified threshold.

- To enable compression on regular inode, there are three ways:
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext

Compress metadata layout:
 [Dnode Structure]
 +---+
 | cluster 1 | cluster 2 | . | cluster N |
 +---+
 .   .   .   .
   .   ..  .
  . Compressed Cluster   ..Normal Cluster   
 .
+--+-+-+-+  
+-+-+-+-+
|compr flag| block 1 | block 2 | block 3 |  | block 1 | block 2 | block 3 | 
block 4 |
+--+-+-+-+  
+-+-+-+-+
   . .
 .   .
   .   .
  +-+-+--++
  | data length | data chksum | reserved |  compressed data   |
  +-+-+--++

Changelog:

20190326:
- fix error handling of read_end_io().
- remove unneeded comments in f2fs_encrypt_one_page().

20190327:
- fix wrong use of f2fs_cluster_is_full() in f2fs_mpage_readpages().
- don't jump into loop directly to avoid uninitialized variables.
- add TODO tag in error path of f2fs_write_cache_pages().

20190328:
- fix wrong merge condition in f2fs_read_multi_pages().
- check compressed file in f2fs_post_read_required().

20190401
- allow overwrite on non-compressed cluster.
- check cluster meta before writing compressed data.

20190402
- don't preallocate blocks for compressed file.

- add lz4 compress algorithm
- process multiple post read works in one workqueue
  Now f2fs supports processing post read work in multiple workqueue,
  it shows low performance due to schedule overhead of multiple
  workqueue executing orderly.

20190921
- compress: support buffered overwrite
C: compress cluster flag
V: valid block address
N: NEW_ADDR

One cluster contain 4 blocks

 before overwrite   after overwrite

-   ->  CVNN
- CVNN  ->  

- CVNN  ->  CVNN
- CVNN  ->  CVVV

- CVVV  ->  CVNN
- CVVV  ->  CVVV

20191029
- add kconfig F2FS_FS_COMPRESSION to isolate compression related
codes, add kconfig F2FS_FS_{LZO,LZ4} to cover backend algorithm.
note that: will remove lzo backend if Jaegeuk agreed that too.
- update codes according to Eric's comments.

20191101
- apply fixes from Jaegeuk

20191113
- apply fixes from Jaegeuk
- split workqueue for fsverity

[Jaegeuk Kim]
- add tracepoint for f2fs_{,de}compress_pages()
- fix many bugs and add some compression stats

Signed-off-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
 Documentation/filesystems/f2fs.txt |   52 ++
 fs/f2fs/Kconfig|   23 +
 fs/f2fs/Makefile   |1 +
 fs/f2fs/compress.c | 1115 
 fs/f2fs/data.c |  611 +--
 fs/f2fs/debug.c|6 +
 fs/f2fs/f2fs.h |  245 +-
 fs/f2fs/file.c |  191 -
 fs/f2fs/inode.c|   43 ++
 fs/f2fs/namei.c|   59 ++
 fs/f2fs/segment.c  |5 +-
 fs/f2fs/segment.h  |   12 -
 fs/f2fs/super.c|  112 ++-
 fs/f2fs/sysfs.c|7 +
 include/linux/f2fs_fs.h|   11 +
 include/trace/events/f2fs.h|   99 +++
 16 files changed, 2478 insertions(+), 114 deletions(-)
 create mode 100644 fs/f2fs/compress.c

diff --git a/Documentation/filesystems/f2fs.txt 
b/Documentation/filesystems/f2fs.txt
index 29020af0cff9..228af93f850b 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -235,6 +235,17

Re: [f2fs-dev] [PATCH 2/2] f2fs: support data compression

2019-11-13 Thread Chao Yu
Hi Jaegeuk,

I've split workqueue for fsverity, please test compression based on last patch.

I shutdown F2FS_FS_COMPRESSION config, it looks all verity testcases can pass, 
will
do more test for compress/encrypt/fsverity combination later.

The diff is as below, code base is last g-dev-test branch:

>From 5b51682bc3013b8de6dee4906865181c3ded435f Mon Sep 17 00:00:00 2001
From: Chao Yu 
Date: Tue, 12 Nov 2019 10:03:21 +0800
Subject: [PATCH INCREMENT] f2fs: compress: split workqueue for fsverity

Signed-off-by: Chao Yu 
---
 fs/f2fs/compress.c | 16 +---
 fs/f2fs/data.c | 94 +++---
 fs/f2fs/f2fs.h |  2 +-
 3 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index f4ce825f12b4..254275325890 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -377,7 +377,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)

dec_page_count(sbi, F2FS_RD_DATA);

-   if (bio->bi_status)
+   if (bio->bi_status || PageError(page))
dic->failed = true;

if (refcount_dec_not_one(>ref))
@@ -419,10 +419,14 @@ void f2fs_decompress_pages(struct bio *bio, struct page 
*page, bool verity)
 out_vunmap_rbuf:
vunmap(dic->rbuf);
 out_free_dic:
-   f2fs_set_cluster_uptodate(dic->rpages, dic->cluster_size, ret, verity);
+   if (!verity)
+   f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
+   ret, false);
+
trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx,
-   dic->clen, ret);
-   f2fs_free_dic(dic);
+   dic->clen, ret);
+   if (!verity)
+   f2fs_free_dic(dic);
 }

 static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index)
@@ -1086,7 +1090,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
kfree(dic);
 }

-void f2fs_set_cluster_uptodate(struct page **rpages,
+void f2fs_decompress_end_io(struct page **rpages,
unsigned int cluster_size, bool err, bool verity)
 {
int i;
@@ -1108,4 +1112,4 @@ void f2fs_set_cluster_uptodate(struct page **rpages,
}
unlock_page(rpage);
}
-}
+}
\ No newline at end of file
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c9362a53f8a1..2d64c6ffee84 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -98,7 +98,7 @@ static void __read_end_io(struct bio *bio, bool compr, bool 
verity)
page = bv->bv_page;

 #ifdef CONFIG_F2FS_FS_COMPRESSION
-   if (compr && PagePrivate(page)) {
+   if (compr && f2fs_is_compressed_page(page)) {
f2fs_decompress_pages(bio, page, verity);
continue;
}
@@ -115,9 +115,14 @@ static void __read_end_io(struct bio *bio, bool compr, 
bool verity)
dec_page_count(F2FS_P_SB(page), __read_io_type(page));
unlock_page(page);
}
-   if (bio->bi_private)
-   mempool_free(bio->bi_private, bio_post_read_ctx_pool);
-   bio_put(bio);
+}
+
+static void f2fs_release_read_bio(struct bio *bio);
+static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity)
+{
+   if (!compr)
+   __read_end_io(bio, false, verity);
+   f2fs_release_read_bio(bio);
 }

 static void f2fs_decompress_bio(struct bio *bio, bool verity)
@@ -127,19 +132,50 @@ static void f2fs_decompress_bio(struct bio *bio, bool 
verity)

 static void bio_post_read_processing(struct bio_post_read_ctx *ctx);

-static void decrypt_work(struct bio_post_read_ctx *ctx)
+static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx)
 {
fscrypt_decrypt_bio(ctx->bio);
 }

-static void decompress_work(struct bio_post_read_ctx *ctx, bool verity)
+static void f2fs_decompress_work(struct bio_post_read_ctx *ctx)
+{
+   f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY));
+}
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
 {
-   f2fs_decompress_bio(ctx->bio, verity);
+   f2fs_decompress_end_io(rpages, cluster_size, false, true);
 }

-static void verity_work(struct bio_post_read_ctx *ctx)
+static void f2fs_verify_bio(struct bio *bio)
 {
+   struct page *page = bio_first_page_all(bio);
+   struct decompress_io_ctx *dic =
+   (struct decompress_io_ctx *)page_private(page);
+
+   f2fs_verify_pages(dic->rpages, dic->cluster_size);
+   f2fs_free_dic(dic);
+}
+#endif
+
+static void f2fs_verity_work(struct work_struct *work)
+{
+   struct bio_post_read_ctx *ctx =
+   container_of(work, struct bio_post_read_ctx, work);
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+   /* previous step is decompression */
+   if (ctx->enabled_steps & 

[f2fs-dev] [PATCH] f2fs: Fix deadlock in f2fs_gc() context during atomic files handling

2019-11-13 Thread Sahitya Tummala
The FS got stuck in the below stack when the storage is almost
full/dirty condition (when FG_GC is being done).

schedule_timeout
io_schedule_timeout
congestion_wait
f2fs_drop_inmem_pages_all
f2fs_gc
f2fs_balance_fs
__write_node_page
f2fs_fsync_node_pages
f2fs_do_sync_file
f2fs_ioctl

The root cause for this issue is there is a potential infinite loop
in f2fs_drop_inmem_pages_all() for the case where gc_failure is true
and when there an inode whose i_gc_failures[GC_FAILURE_ATOMIC] is
not set. Fix this by keeping track of the total atomic files
currently opened and using that to exit from this condition.

Fix-suggested-by: Chao Yu 
Signed-off-by: Chao Yu 
Signed-off-by: Sahitya Tummala 
---
v2:
- change fix as per Chao's suggestion
- decrement sbi->atomic_files protected under sbi->inode_lock[ATOMIC_FILE] and
  only when atomic flag is cleared for the first time, otherwise, the count
  goes to an invalid/high value as f2fs_drop_inmem_pages() can be called from
  two contexts at the same time.

 fs/f2fs/f2fs.h|  1 +
 fs/f2fs/file.c|  1 +
 fs/f2fs/segment.c | 21 +++--
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c681f51..e04a665 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1297,6 +1297,7 @@ struct f2fs_sb_info {
unsigned int gc_mode;   /* current GC state */
unsigned int next_victim_seg[2];/* next segment in victim 
section */
/* for skip statistic */
+   unsigned int atomic_files;  /* # of opened atomic file */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem;/* FG_GC only */
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f6c038e..22c4949 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1919,6 +1919,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
spin_lock(>inode_lock[ATOMIC_FILE]);
if (list_empty(>inmem_ilist))
list_add_tail(>inmem_ilist, >inode_list[ATOMIC_FILE]);
+   sbi->atomic_files++;
spin_unlock(>inode_lock[ATOMIC_FILE]);
 
/* add inode in inmem_list first and set atomic_file */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index da830fc..0b7a33b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, 
bool gc_failure)
struct list_head *head = >inode_list[ATOMIC_FILE];
struct inode *inode;
struct f2fs_inode_info *fi;
+   unsigned int count = sbi->atomic_files;
+   unsigned int looped = 0;
 next:
spin_lock(>inode_lock[ATOMIC_FILE]);
if (list_empty(head)) {
@@ -296,22 +298,26 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, 
bool gc_failure)
}
fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
inode = igrab(>vfs_inode);
+   if (inode)
+   list_move_tail(>inmem_ilist, head);
spin_unlock(>inode_lock[ATOMIC_FILE]);
 
if (inode) {
if (gc_failure) {
-   if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
-   goto drop;
-   goto skip;
+   if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
+   goto skip;
}
-drop:
set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
f2fs_drop_inmem_pages(inode);
+skip:
iput(inode);
}
-skip:
congestion_wait(BLK_RW_ASYNC, HZ/50);
cond_resched();
+   if (gc_failure) {
+   if (++looped >= count)
+   return;
+   }
goto next;
 }
 
@@ -327,13 +333,16 @@ void f2fs_drop_inmem_pages(struct inode *inode)
mutex_unlock(>inmem_lock);
}
 
-   clear_inode_flag(inode, FI_ATOMIC_FILE);
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
 
spin_lock(>inode_lock[ATOMIC_FILE]);
if (!list_empty(>inmem_ilist))
list_del_init(>inmem_ilist);
+   if (f2fs_is_atomic_file(inode)) {
+   clear_inode_flag(inode, FI_ATOMIC_FILE);
+   sbi->atomic_files--;
+   }
spin_unlock(>inode_lock[ATOMIC_FILE]);
 }
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel