[f2fs-dev] [PATCH] f2fs: call unlock_new_inode() before d_instantiate()

2018-04-18 Thread Eric Biggers
From: Eric Biggers 

xfstest generic/429 sometimes hangs on f2fs, caused by a thread being
unable to take a directory's i_rwsem for write in vfs_rmdir().  In the
test, one thread repeatedly creates and removes a directory, and other
threads repeatedly look up a file in the directory.  The bug is that
f2fs_mkdir() calls d_instantiate() before unlock_new_inode(), resulting
in the directory inode being exposed to lookups before it has been fully
initialized.  And with CONFIG_DEBUG_LOCK_ALLOC, unlock_new_inode()
reinitializes ->i_rwsem, corrupting its state when it is already held.

Fix it by calling unlock_new_inode() before d_instantiate().  This
matches what other filesystems do.

Fixes: 57397d86c62d ("f2fs: add inode operations for special inodes")
Signed-off-by: Eric Biggers 
---
 fs/f2fs/namei.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index d5098efe577c..3a7ed962d2f7 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -294,8 +294,8 @@ static int f2fs_create(struct inode *dir, struct dentry 
*dentry, umode_t mode,
 
alloc_nid_done(sbi, ino);
 
-   d_instantiate(dentry, inode);
unlock_new_inode(inode);
+   d_instantiate(dentry, inode);
 
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -597,8 +597,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry 
*dentry,
err = page_symlink(inode, disk_link.name, disk_link.len);
 
 err_out:
-   d_instantiate(dentry, inode);
unlock_new_inode(inode);
+   d_instantiate(dentry, inode);
 
/*
 * Let's flush symlink data in order to avoid broken symlink as much as
@@ -661,8 +661,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry 
*dentry, umode_t mode)
 
alloc_nid_done(sbi, inode->i_ino);
 
-   d_instantiate(dentry, inode);
unlock_new_inode(inode);
+   d_instantiate(dentry, inode);
 
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -713,8 +713,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry 
*dentry,
 
alloc_nid_done(sbi, inode->i_ino);
 
-   d_instantiate(dentry, inode);
unlock_new_inode(inode);
+   d_instantiate(dentry, inode);
 
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
-- 
2.17.0.484.g0c8726318c-goog


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v2 1/2] fscrypt: allow synchronous bio decryption

2018-04-18 Thread Eric Biggers via Linux-f2fs-devel
Currently, fscrypt provides fscrypt_decrypt_bio_pages() which decrypts a
bio's pages asynchronously, then unlocks them afterwards.  But, this
assumes that decryption is the last "postprocessing step" for the bio,
so it's incompatible with additional postprocessing steps such as
authenticity verification after decryption.

Therefore, rename the existing fscrypt_decrypt_bio_pages() to
fscrypt_enqueue_decrypt_bio().  Then, add fscrypt_decrypt_bio() which
decrypts the pages in the bio synchronously without unlocking the pages,
nor setting them Uptodate; and add fscrypt_enqueue_decrypt_work(), which
enqueues work on the fscrypt_read_workqueue.  The new functions will be
used by filesystems that support both fscrypt and fs-verity.

Signed-off-by: Eric Biggers 
---
 fs/crypto/bio.c | 35 +
 fs/crypto/crypto.c  |  8 +++-
 fs/crypto/fscrypt_private.h |  1 -
 fs/ext4/readpage.c  |  2 +-
 fs/f2fs/data.c  |  2 +-
 include/linux/fscrypt_notsupp.h | 13 +---
 include/linux/fscrypt_supp.h|  5 -
 7 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 0d5e6a569d58..0959044c5cee 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -26,15 +26,8 @@
 #include 
 #include "fscrypt_private.h"
 
-/*
- * Call fscrypt_decrypt_page on every single page, reusing the encryption
- * context.
- */
-static void completion_pages(struct work_struct *work)
+static void __fscrypt_decrypt_bio(struct bio *bio, bool done)
 {
-   struct fscrypt_ctx *ctx =
-   container_of(work, struct fscrypt_ctx, r.work);
-   struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
 
@@ -46,22 +39,38 @@ static void completion_pages(struct work_struct *work)
if (ret) {
WARN_ON_ONCE(1);
SetPageError(page);
-   } else {
+   } else if (done) {
SetPageUptodate(page);
}
-   unlock_page(page);
+   if (done)
+   unlock_page(page);
}
+}
+
+void fscrypt_decrypt_bio(struct bio *bio)
+{
+   __fscrypt_decrypt_bio(bio, false);
+}
+EXPORT_SYMBOL(fscrypt_decrypt_bio);
+
+static void completion_pages(struct work_struct *work)
+{
+   struct fscrypt_ctx *ctx =
+   container_of(work, struct fscrypt_ctx, r.work);
+   struct bio *bio = ctx->r.bio;
+
+   __fscrypt_decrypt_bio(bio, true);
fscrypt_release_ctx(ctx);
bio_put(bio);
 }
 
-void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
+void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio)
 {
INIT_WORK(>r.work, completion_pages);
ctx->r.bio = bio;
-   queue_work(fscrypt_read_workqueue, >r.work);
+   fscrypt_enqueue_decrypt_work(>r.work);
 }
-EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
+EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio);
 
 void fscrypt_pullback_bio_page(struct page **page, bool restore)
 {
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index ce654526c0fb..0758d32ad01b 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -45,12 +45,18 @@ static mempool_t *fscrypt_bounce_page_pool = NULL;
 static LIST_HEAD(fscrypt_free_ctxs);
 static DEFINE_SPINLOCK(fscrypt_ctx_lock);
 
-struct workqueue_struct *fscrypt_read_workqueue;
+static struct workqueue_struct *fscrypt_read_workqueue;
 static DEFINE_MUTEX(fscrypt_init_mutex);
 
 static struct kmem_cache *fscrypt_ctx_cachep;
 struct kmem_cache *fscrypt_info_cachep;
 
+void fscrypt_enqueue_decrypt_work(struct work_struct *work)
+{
+   queue_work(fscrypt_read_workqueue, work);
+}
+EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work);
+
 /**
  * fscrypt_release_ctx() - Releases an encryption context
  * @ctx: The encryption context to release.
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index ad6722bae8b7..4012558f6115 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -97,7 +97,6 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
 /* crypto.c */
 extern struct kmem_cache *fscrypt_info_cachep;
 extern int fscrypt_initialize(unsigned int cop_flags);
-extern struct workqueue_struct *fscrypt_read_workqueue;
 extern int fscrypt_do_page_crypto(const struct inode *inode,
  fscrypt_direction_t rw, u64 lblk_num,
  struct page *src_page,
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 9ffa6fad18db..19b87a8de6ff 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -77,7 +77,7 @@ static void mpage_end_io(struct bio *bio)
if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
-   fscrypt_decrypt_bio_pages(bio->bi_private, bio);
+   

[f2fs-dev] [PATCH v2 2/2] f2fs: refactor read path to allow multiple postprocessing steps

2018-04-18 Thread Eric Biggers via Linux-f2fs-devel
Currently f2fs's ->readpage() and ->readpages() assume that either the
data undergoes no postprocessing, or decryption only.  But with
fs-verity, there will be an additional authenticity verification step,
and it may be needed either by itself, or combined with decryption.

To support this, store a 'struct bio_post_read_ctx' in ->bi_private
which contains a work struct, a bitmask of postprocessing steps that are
enabled, and an indicator of the current step.  The bio completion
routine, if there was no I/O error, enqueues the first postprocessing
step.  When that completes, it continues to the next step.  Pages that
fail any postprocessing step have PageError set.  Once all steps have
completed, pages without PageError set are set Uptodate, and all pages
are unlocked.

Also replace f2fs_encrypted_file() with a new function
f2fs_post_read_required() in places like direct I/O and garbage
collection that really should be testing whether the file needs special
I/O processing, not whether it is encrypted specifically.

This may also be useful for other future f2fs features such as
compression.

Signed-off-by: Eric Biggers 
---
 fs/f2fs/data.c   | 166 +++
 fs/f2fs/f2fs.h   |  12 +++-
 fs/f2fs/file.c   |   4 +-
 fs/f2fs/gc.c |   6 +-
 fs/f2fs/inline.c |   2 +-
 fs/f2fs/super.c  |   6 ++
 6 files changed, 147 insertions(+), 49 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 39225519de1c..ad503603ceec 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -30,6 +30,11 @@
 #include "trace.h"
 #include 
 
+#define NUM_PREALLOC_POST_READ_CTXS128
+
+static struct kmem_cache *bio_post_read_ctx_cache;
+static mempool_t *bio_post_read_ctx_pool;
+
 static bool __is_cp_guaranteed(struct page *page)
 {
struct address_space *mapping = page->mapping;
@@ -50,11 +55,77 @@ static bool __is_cp_guaranteed(struct page *page)
return false;
 }
 
-static void f2fs_read_end_io(struct bio *bio)
+/* postprocessing steps for read bios */
+enum bio_post_read_step {
+   STEP_INITIAL = 0,
+   STEP_DECRYPT,
+};
+
+struct bio_post_read_ctx {
+   struct bio *bio;
+   struct work_struct work;
+   unsigned int cur_step;
+   unsigned int enabled_steps;
+};
+
+static void __read_end_io(struct bio *bio)
 {
-   struct bio_vec *bvec;
+   struct page *page;
+   struct bio_vec *bv;
int i;
 
+   bio_for_each_segment_all(bv, bio, i) {
+   page = bv->bv_page;
+
+   /* PG_error was set if any post_read step failed */
+   if (bio->bi_status || PageError(page)) {
+   ClearPageUptodate(page);
+   SetPageError(page);
+   } else {
+   SetPageUptodate(page);
+   }
+   unlock_page(page);
+   }
+   if (bio->bi_private)
+   mempool_free(bio->bi_private, bio_post_read_ctx_pool);
+   bio_put(bio);
+}
+
+static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
+
+static void decrypt_work(struct work_struct *work)
+{
+   struct bio_post_read_ctx *ctx =
+   container_of(work, struct bio_post_read_ctx, work);
+
+   fscrypt_decrypt_bio(ctx->bio);
+
+   bio_post_read_processing(ctx);
+}
+
+static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
+{
+   switch (++ctx->cur_step) {
+   case STEP_DECRYPT:
+   if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
+   INIT_WORK(>work, decrypt_work);
+   fscrypt_enqueue_decrypt_work(>work);
+   return;
+   }
+   ctx->cur_step++;
+   /* fall-through */
+   default:
+   __read_end_io(ctx->bio);
+   }
+}
+
+static bool f2fs_bio_post_read_required(struct bio *bio)
+{
+   return bio->bi_private && !bio->bi_status;
+}
+
+static void f2fs_read_end_io(struct bio *bio)
+{
 #ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
@@ -62,28 +133,15 @@ static void f2fs_read_end_io(struct bio *bio)
}
 #endif
 
-   if (f2fs_bio_encrypted(bio)) {
-   if (bio->bi_status) {
-   fscrypt_release_ctx(bio->bi_private);
-   } else {
-   fscrypt_enqueue_decrypt_bio(bio->bi_private, bio);
-   return;
-   }
-   }
-
-   bio_for_each_segment_all(bvec, bio, i) {
-   struct page *page = bvec->bv_page;
+   if (f2fs_bio_post_read_required(bio)) {
+   struct bio_post_read_ctx *ctx = bio->bi_private;
 
-   if (!bio->bi_status) {
-   if (!PageUptodate(page))
-   SetPageUptodate(page);
-   } else {
-   ClearPageUptodate(page);
-   

[f2fs-dev] [PATCH v2 0/2] fscrypt / f2fs: prepare I/O path for fs-verity

2018-04-18 Thread Eric Biggers via Linux-f2fs-devel
Hello,

These two patches restructure f2fs's read path to allow the data to go
through multiple postprocessing steps, rather than just decryption as is
implemented currently.  This is mainly in preparation for doing
authenticity verification of data via fs-verity, though this change
might also be useful for other future f2fs features, e.g. compression.

These patches don't yet add the fs-verity work, however, as it depends
on the rest of the fs-verity patchset.  I'm planning to send the full
patchset out as an RFC, but some parts need further investigation first.
(The work-in-progress version can be found at
 git://git.kernel.org/pub/scm/linux/kernel/git/mhalcrow/linux.git,
 branch "fs-verity-dev".)

Changed since v1:

- Define NUM_PREALLOC_POST_READ_CTXS

Eric Biggers (2):
  fscrypt: allow synchronous bio decryption
  f2fs: refactor read path to allow multiple postprocessing steps

 fs/crypto/bio.c |  35 ---
 fs/crypto/crypto.c  |   8 +-
 fs/crypto/fscrypt_private.h |   1 -
 fs/ext4/readpage.c  |   2 +-
 fs/f2fs/data.c  | 166 
 fs/f2fs/f2fs.h  |  12 ++-
 fs/f2fs/file.c  |   4 +-
 fs/f2fs/gc.c|   6 +-
 fs/f2fs/inline.c|   2 +-
 fs/f2fs/super.c |   6 ++
 include/linux/fscrypt_notsupp.h |  13 ++-
 include/linux/fscrypt_supp.h|   5 +-
 12 files changed, 191 insertions(+), 69 deletions(-)

-- 
2.17.0.484.g0c8726318c-goog


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH 2/2] f2fs: refactor read path to allow multiple postprocessing steps

2018-04-18 Thread Jaegeuk Kim
On 04/18, Eric Biggers wrote:
> Hi Chao,
> 
> On Wed, Apr 18, 2018 at 02:27:32PM +0800, Chao Yu wrote:
> > Hi Eric,
> > 
> > On 2018/4/18 1:42, Eric Biggers wrote:
> > > Hi Chao,
> > > 
> > > On Tue, Apr 17, 2018 at 05:13:12PM +0800, Chao Yu wrote:
> > >>> +
> > >>> +static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
> > >>> +
> > >>> +static void decrypt_work(struct work_struct *work)
> > >>> +{
> > >>> +   struct bio_post_read_ctx *ctx =
> > >>> +   container_of(work, struct bio_post_read_ctx, work);
> > >>> +
> > >>> +   fscrypt_decrypt_bio(ctx->bio);
> > >>> +
> > >>> +   bio_post_read_processing(ctx);
> > >>> +}
> > >>> +
> > >>> +static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
> > >>> +{
> > >>> +   switch (++ctx->cur_step) {
> > >>> +   case STEP_DECRYPT:
> > >>> +   if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
> > >>> +   INIT_WORK(>work, decrypt_work);
> > >>> +   fscrypt_enqueue_decrypt_work(>work);
> > >>> +   return;
> > >>> +   }
> > >>> +   ctx->cur_step++;
> > >>> +   /* fall-through */
> > >>> +   default:
> > >>> +   __read_end_io(ctx->bio);
> > >>> +   }
> > >>
> > >> How about introducing __bio_post_read_processing()
> > >>
> > >> switch (step) {
> > >> case STEP_DECRYPT:
> > >>  ...
> > >>  break;
> > >> case STEP_COMPRESS:
> > >>  ...
> > >>  break;
> > >> case STEP_GENERIC:
> > >>  __read_end_io;
> > >>  break;
> > >> ...
> > >> }
> > >>
> > >> Then we can customize flexible read processes like:
> > >>
> > >> bio_post_read_processing()
> > >> {
> > >>  if (encrypt_enabled)
> > >>  __bio_post_read_processing(, STEP_DECRYPT);
> > >>  if (compress_enabled)
> > >>  __bio_post_read_processing(, STEP_COMPRESS);
> > >>  __bio_post_read_processing(, STEP_GENERIC);
> > >> }
> > >>
> > >> Or other flow.
> > > 
> > > If I understand correctly, you're suggesting that all the steps be done 
> > > in a
> > > single workqueue item?  The problem with that is that the verity work will
> > 
> > Yup,
> > 
> > > require I/O to the file to read hashes, which may need STEP_DECRYPT.  
> > > Hence,
> > > decryption and verity will need separate workqueues.
> > 
> > For decryption and verity, the needs separated data, I agree that we can not
> > merge the work into one workqueue.
> > 
> > As you mentioned in commit message, it can be used by compression later, so 
> > I
> > just thought that for decryption and decompression, maybe we can do those 
> > work
> > sequentially in one workqueue?
> > 
> 
> Sure.  I'm not sure what you're asking me to do, though, since f2fs 
> compression
> doesn't exist yet.  If/when there are multiple steps that can be combined, 
> then
> bio_post_read_processing() can be updated to schedule them together.

Indeed, we may need to consolidate many workqueues into one later, not at this
time, IMO.

> 
> > > 
> > >>> @@ -481,29 +537,33 @@ static struct bio *f2fs_grab_read_bio(struct 
> > >>> inode *inode, block_t blkaddr,
> > >>>  unsigned 
> > >>> nr_pages)
> > >>>  {
> > >>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > >>> -   struct fscrypt_ctx *ctx = NULL;
> > >>> struct bio *bio;
> > >>> -
> > >>> -   if (f2fs_encrypted_file(inode)) {
> > >>> -   ctx = fscrypt_get_ctx(inode, GFP_NOFS);
> > >>> -   if (IS_ERR(ctx))
> > >>> -   return ERR_CAST(ctx);
> > >>> -
> > >>> -   /* wait the page to be moved by cleaning */
> > >>> -   f2fs_wait_on_block_writeback(sbi, blkaddr);
> > >>> -   }
> > >>> +   struct bio_post_read_ctx *ctx;
> > >>> +   unsigned int post_read_steps = 0;
> > >>>  
> > >>> bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), 
> > >>> false);
> > >>> -   if (!bio) {
> > >>> -   if (ctx)
> > >>> -   fscrypt_release_ctx(ctx);
> > >>> +   if (!bio)
> > >>> return ERR_PTR(-ENOMEM);
> > >>> -   }
> > >>> f2fs_target_device(sbi, blkaddr, bio);
> > >>> bio->bi_end_io = f2fs_read_end_io;
> > >>> -   bio->bi_private = ctx;
> > >>
> > >> bio->bi_private = NULL;
> > >>
> > > 
> > > I don't see why.  ->bi_private is NULL by default.
> > 
> > As we will check bi_private in read_end_io anyway, if it is not NULL, we 
> > will
> > parse it as an ctx, am I missing something?
> > 
> 
> We're allocating a new bio.  New bios have NULL ->bi_private.

+1.

bio_init() does memset();

> 
> > Thanks,
> > 
> > > 
> > >>> +   bio_post_read_ctx_pool =
> > >>> +   mempool_create_slab_pool(128, bio_post_read_ctx_cache);
> > >>
> > >> #define MAX_POST_READ_CACHE_SIZE 128
> > >>
> > > 
> > > Yes, that makes sense.
> > > 
> 
> Actually it's the number of contexts preallocated in the mempool, so I'm going
> to 

Re: [f2fs-dev] [PATCH 2/2] f2fs: refactor read path to allow multiple postprocessing steps

2018-04-18 Thread Eric Biggers via Linux-f2fs-devel
Hi Chao,

On Wed, Apr 18, 2018 at 02:27:32PM +0800, Chao Yu wrote:
> Hi Eric,
> 
> On 2018/4/18 1:42, Eric Biggers wrote:
> > Hi Chao,
> > 
> > On Tue, Apr 17, 2018 at 05:13:12PM +0800, Chao Yu wrote:
> >>> +
> >>> +static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
> >>> +
> >>> +static void decrypt_work(struct work_struct *work)
> >>> +{
> >>> + struct bio_post_read_ctx *ctx =
> >>> + container_of(work, struct bio_post_read_ctx, work);
> >>> +
> >>> + fscrypt_decrypt_bio(ctx->bio);
> >>> +
> >>> + bio_post_read_processing(ctx);
> >>> +}
> >>> +
> >>> +static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
> >>> +{
> >>> + switch (++ctx->cur_step) {
> >>> + case STEP_DECRYPT:
> >>> + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
> >>> + INIT_WORK(>work, decrypt_work);
> >>> + fscrypt_enqueue_decrypt_work(>work);
> >>> + return;
> >>> + }
> >>> + ctx->cur_step++;
> >>> + /* fall-through */
> >>> + default:
> >>> + __read_end_io(ctx->bio);
> >>> + }
> >>
> >> How about introducing __bio_post_read_processing()
> >>
> >> switch (step) {
> >> case STEP_DECRYPT:
> >>...
> >>break;
> >> case STEP_COMPRESS:
> >>...
> >>break;
> >> case STEP_GENERIC:
> >>__read_end_io;
> >>break;
> >> ...
> >> }
> >>
> >> Then we can customize flexible read processes like:
> >>
> >> bio_post_read_processing()
> >> {
> >>if (encrypt_enabled)
> >>__bio_post_read_processing(, STEP_DECRYPT);
> >>if (compress_enabled)
> >>__bio_post_read_processing(, STEP_COMPRESS);
> >>__bio_post_read_processing(, STEP_GENERIC);
> >> }
> >>
> >> Or other flow.
> > 
> > If I understand correctly, you're suggesting that all the steps be done in a
> > single workqueue item?  The problem with that is that the verity work will
> 
> Yup,
> 
> > require I/O to the file to read hashes, which may need STEP_DECRYPT.  Hence,
> > decryption and verity will need separate workqueues.
> 
> For decryption and verity, the needs separated data, I agree that we can not
> merge the work into one workqueue.
> 
> As you mentioned in commit message, it can be used by compression later, so I
> just thought that for decryption and decompression, maybe we can do those work
> sequentially in one workqueue?
> 

Sure.  I'm not sure what you're asking me to do, though, since f2fs compression
doesn't exist yet.  If/when there are multiple steps that can be combined, then
bio_post_read_processing() can be updated to schedule them together.

> > 
> >>> @@ -481,29 +537,33 @@ static struct bio *f2fs_grab_read_bio(struct inode 
> >>> *inode, block_t blkaddr,
> >>>unsigned nr_pages)
> >>>  {
> >>>   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>> - struct fscrypt_ctx *ctx = NULL;
> >>>   struct bio *bio;
> >>> -
> >>> - if (f2fs_encrypted_file(inode)) {
> >>> - ctx = fscrypt_get_ctx(inode, GFP_NOFS);
> >>> - if (IS_ERR(ctx))
> >>> - return ERR_CAST(ctx);
> >>> -
> >>> - /* wait the page to be moved by cleaning */
> >>> - f2fs_wait_on_block_writeback(sbi, blkaddr);
> >>> - }
> >>> + struct bio_post_read_ctx *ctx;
> >>> + unsigned int post_read_steps = 0;
> >>>  
> >>>   bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
> >>> - if (!bio) {
> >>> - if (ctx)
> >>> - fscrypt_release_ctx(ctx);
> >>> + if (!bio)
> >>>   return ERR_PTR(-ENOMEM);
> >>> - }
> >>>   f2fs_target_device(sbi, blkaddr, bio);
> >>>   bio->bi_end_io = f2fs_read_end_io;
> >>> - bio->bi_private = ctx;
> >>
> >> bio->bi_private = NULL;
> >>
> > 
> > I don't see why.  ->bi_private is NULL by default.
> 
> As we will check bi_private in read_end_io anyway, if it is not NULL, we will
> parse it as an ctx, am I missing something?
> 

We're allocating a new bio.  New bios have NULL ->bi_private.

> Thanks,
> 
> > 
> >>> + bio_post_read_ctx_pool =
> >>> + mempool_create_slab_pool(128, bio_post_read_ctx_cache);
> >>
> >> #define MAX_POST_READ_CACHE_SIZE   128
> >>
> > 
> > Yes, that makes sense.
> > 

Actually it's the number of contexts preallocated in the mempool, so I'm going
to call it NUM_PREALLOC_POST_READ_CTXS.  It's similar to
'num_prealloc_crypto_ctxs' in fs/crypto/crypto.c.

Eric

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH 1/5] f2fs: fix race in between GC and atomic open

2018-04-18 Thread Chao Yu
Hi all,

Please ignore this patch, I just sent this before, sorry.

Thanks,

On 2018/4/18 17:45, Chao Yu wrote:
> ThreadGC thread
> - f2fs_ioc_start_atomic_write
>  - get_dirty_pages
>  - filemap_write_and_wait_range
>   - f2fs_gc
>- do_garbage_collect
> - gc_data_segment
>  - move_data_page
>   - f2fs_is_atomic_file
>   - set_page_dirty
>  - set_inode_flag(, FI_ATOMIC_FILE)
> 
> Dirty data page can still be generated by GC in race condition as
> above call stack.
> 
> This patch adds fi->dio_rwsem[WRITE] in f2fs_ioc_start_atomic_write
> to avoid such race.
> 
> Signed-off-by: Chao Yu 
> ---
>  fs/f2fs/file.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 78b3a58cfe21..408471bf4799 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1677,6 +1677,8 @@ static int f2fs_ioc_start_atomic_write(struct file 
> *filp)
>  
>   inode_lock(inode);
>  
> + down_write(_I(inode)->dio_rwsem[WRITE]);
> +
>   if (f2fs_is_atomic_file(inode))
>   goto out;
>  
> @@ -1702,6 +1704,7 @@ static int f2fs_ioc_start_atomic_write(struct file 
> *filp)
>   stat_inc_atomic_write(inode);
>   stat_update_max_atomic_write(inode);
>  out:
> + up_write(_I(inode)->dio_rwsem[WRITE]);
>   inode_unlock(inode);
>   mnt_drop_write_file(filp);
>   return ret;
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v2 5/5] f2fs: fix to avoid race during access gc_thread pointer

2018-04-18 Thread Chao Yu
Thread AThread BThread C
- f2fs_remount
 - stop_gc_thread
- f2fs_sbi_store
- issue_discard_thread
   sbi->gc_thread = NULL;
  sbi->gc_thread->gc_wake = 1
  access 
sbi->gc_thread->gc_urgent

Previously, we allocate memory for sbi->gc_thread based on background
gc thread mount option, the memory can be released if we turn off
that mount option, but still there are several places access gc_thread
pointer without considering race condition, result in NULL point
dereference.

In order to fix this issue, keep gc_thread structure valid in sbi all
the time instead of alloc/free it dynamically.

Signed-off-by: Chao Yu 
---
v2: avoid double destroy_gc_context.
 fs/f2fs/debug.c   |  3 +--
 fs/f2fs/f2fs.h|  7 +++
 fs/f2fs/gc.c  | 58 +--
 fs/f2fs/segment.c |  4 ++--
 fs/f2fs/super.c   | 11 +--
 fs/f2fs/sysfs.c   |  8 
 6 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 715beb85e9db..7bb036a3bb81 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -223,8 +223,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->cache_mem = 0;
 
/* build gc */
-   if (sbi->gc_thread)
-   si->cache_mem += sizeof(struct f2fs_gc_kthread);
+   si->cache_mem += sizeof(struct f2fs_gc_kthread);
 
/* build merge flush thread */
if (SM_I(sbi)->fcc_info)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 567c6bb57ae3..c553f63199e8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info 
*sbi)
return (struct sit_info *)(SM_I(sbi)->sit_info);
 }
 
+static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
+{
+   return (struct f2fs_gc_kthread *)(sbi->gc_thread);
+}
+
 static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
 {
return (struct free_segmap_info *)(SM_I(sbi)->free_info);
@@ -2954,6 +2959,8 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, 
size_t len);
 /*
  * gc.c
  */
+int init_gc_context(struct f2fs_sb_info *sbi);
+void destroy_gc_context(struct f2fs_sb_info * sbi);
 int start_gc_thread(struct f2fs_sb_info *sbi);
 void stop_gc_thread(struct f2fs_sb_info *sbi);
 block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index da89ca16a55d..7d310e454b77 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -26,8 +26,8 @@
 static int gc_thread_func(void *data)
 {
struct f2fs_sb_info *sbi = data;
-   struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
-   wait_queue_head_t *wq = >gc_thread->gc_wait_queue_head;
+   struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+   wait_queue_head_t *wq = _th->gc_wait_queue_head;
unsigned int wait_ms;
 
wait_ms = gc_th->min_sleep_time;
@@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
return 0;
 }
 
-int start_gc_thread(struct f2fs_sb_info *sbi)
+int init_gc_context(struct f2fs_sb_info *sbi)
 {
struct f2fs_gc_kthread *gc_th;
-   dev_t dev = sbi->sb->s_bdev->bd_dev;
-   int err = 0;
 
gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
-   if (!gc_th) {
-   err = -ENOMEM;
-   goto out;
-   }
+   if (!gc_th)
+   return -ENOMEM;
+
+   gc_th->f2fs_gc_task = NULL;
 
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
@@ -139,26 +137,41 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->atomic_file[FG_GC] = 0;
 
sbi->gc_thread = gc_th;
-   init_waitqueue_head(>gc_thread->gc_wait_queue_head);
-   sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
+
+   return 0;
+}
+
+void destroy_gc_context(struct f2fs_sb_info *sbi)
+{
+   kfree(GC_I(sbi));
+   sbi->gc_thread = NULL;
+}
+
+int start_gc_thread(struct f2fs_sb_info *sbi)
+{
+   struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+   dev_t dev = sbi->sb->s_bdev->bd_dev;
+   int err = 0;
+
+   init_waitqueue_head(_th->gc_wait_queue_head);
+   gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
-   kfree(gc_th);
-   sbi->gc_thread = NULL;
+   gc_th->f2fs_gc_task = NULL;
}
-out:
+
return err;
 }
 
 void stop_gc_thread(struct f2fs_sb_info *sbi)
 {
-   struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
-   if (!gc_th)
-   return;
-   

[f2fs-dev] [PATCH 1/5] f2fs: fix race in between GC and atomic open

2018-04-18 Thread Chao Yu
Thread  GC thread
- f2fs_ioc_start_atomic_write
 - get_dirty_pages
 - filemap_write_and_wait_range
- f2fs_gc
 - do_garbage_collect
  - gc_data_segment
   - move_data_page
- f2fs_is_atomic_file
- set_page_dirty
 - set_inode_flag(, FI_ATOMIC_FILE)

Dirty data page can still be generated by GC in race condition as
above call stack.

This patch adds fi->dio_rwsem[WRITE] in f2fs_ioc_start_atomic_write
to avoid such race.

Signed-off-by: Chao Yu 
---
 fs/f2fs/file.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 78b3a58cfe21..408471bf4799 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1677,6 +1677,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 
inode_lock(inode);
 
+   down_write(_I(inode)->dio_rwsem[WRITE]);
+
if (f2fs_is_atomic_file(inode))
goto out;
 
@@ -1702,6 +1704,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
 out:
+   up_write(_I(inode)->dio_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
-- 
2.15.0.55.gc2ece9dc4de6


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 2/5] f2fs: fix return value in f2fs_ioc_commit_atomic_write

2018-04-18 Thread Chao Yu
In f2fs_ioc_commit_atomic_write, if file is volatile, return -EINVAL to
indicate that commit failure.

Signed-off-by: Chao Yu 
---
 fs/f2fs/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 408471bf4799..7c90ded5a431 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1726,8 +1726,10 @@ static int f2fs_ioc_commit_atomic_write(struct file 
*filp)
 
down_write(_I(inode)->dio_rwsem[WRITE]);
 
-   if (f2fs_is_volatile_file(inode))
+   if (f2fs_is_volatile_file(inode)) {
+   ret = -EINVAL;
goto err_out;
+   }
 
if (f2fs_is_atomic_file(inode)) {
ret = commit_inmem_pages(inode);
-- 
2.15.0.55.gc2ece9dc4de6


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 3/5] f2fs: avoid stucking GC due to atomic write

2018-04-18 Thread Chao Yu
f2fs doesn't allow abuse on atomic write class interface, so except
limiting in-mem pages' total memory usage capacity, we need to limit
atomic-write usage as well when filesystem is seriously fragmented,
otherwise we may run into infinite loop during foreground GC because
target blocks in victim segment are belong to atomic opened file for
long time.

Now, we will detect failure due to atomic write in foreground GC, if
the count exceeds threshold, we will drop all atomic written data in
cache, by this, I expect it can keep our system running safely to
prevent Dos attack.

Signed-off-by: Chao Yu 
---
 fs/f2fs/f2fs.h|  1 +
 fs/f2fs/file.c|  5 +
 fs/f2fs/gc.c  | 27 +++
 fs/f2fs/gc.h  |  3 +++
 fs/f2fs/segment.c |  1 +
 fs/f2fs/segment.h |  2 ++
 6 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c1c3a1d11186..3453288d6a71 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2249,6 +2249,7 @@ enum {
FI_EXTRA_ATTR,  /* indicate file has extra attribute */
FI_PROJ_INHERIT,/* indicate file inherits projectid */
FI_PIN_FILE,/* indicate file should not be gced */
+   FI_ATOMIC_REVOKE_REQUEST,/* indicate atomic committed data has been 
dropped */
 };
 
 static inline void __mark_inode_dirty_flag(struct inode *inode,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7c90ded5a431..cddd9aee1bb2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1698,6 +1698,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 skip_flush:
set_inode_flag(inode, FI_HOT_DATA);
set_inode_flag(inode, FI_ATOMIC_FILE);
+   clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 
F2FS_I(inode)->inmem_task = current;
@@ -1746,6 +1747,10 @@ static int f2fs_ioc_commit_atomic_write(struct file 
*filp)
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
 err_out:
+   if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
+   clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+   ret = -EINVAL;
+   }
up_write(_I(inode)->dio_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index bfb7a4a3a929..495876ca62b6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -135,6 +135,8 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->gc_urgent = 0;
gc_th->gc_wake= 0;
 
+   gc_th->atomic_file = 0;
+
sbi->gc_thread = gc_th;
init_waitqueue_head(>gc_thread->gc_wait_queue_head);
sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
@@ -603,7 +605,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct 
f2fs_summary *sum,
  * This can be used to move blocks, aka LBAs, directly on disk.
  */
 static void move_data_block(struct inode *inode, block_t bidx,
-   unsigned int segno, int off)
+   int gc_type, unsigned int segno, int off)
 {
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
@@ -630,8 +632,10 @@ static void move_data_block(struct inode *inode, block_t 
bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
 
-   if (f2fs_is_atomic_file(inode))
+   if (f2fs_is_atomic_file(inode)) {
+   F2FS_I_SB(inode)->gc_thread->atomic_file++;
goto out;
+   }
 
if (f2fs_is_pinned_file(inode)) {
f2fs_pin_file_control(inode, true);
@@ -737,8 +741,10 @@ static void move_data_page(struct inode *inode, block_t 
bidx, int gc_type,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
 
-   if (f2fs_is_atomic_file(inode))
+   if (f2fs_is_atomic_file(inode)) {
+   F2FS_I_SB(inode)->gc_thread->atomic_file++;
goto out;
+   }
if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
f2fs_pin_file_control(inode, true);
@@ -900,7 +906,8 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, 
struct f2fs_summary *sum,
start_bidx = start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_encrypted_file(inode))
-   move_data_block(inode, start_bidx, segno, off);
+   move_data_block(inode, start_bidx, gc_type,
+   segno, off);
else
move_data_page(inode, start_bidx, gc_type,
segno, off);
@@ -1017,6 +1024,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.ilist = 

[f2fs-dev] [PATCH 4/5] f2fs: show GC failure info in debugfs

2018-04-18 Thread Chao Yu
This patch adds to show GC failure information in debugfs, now it just
shows count of failure caused by atomic write.

Signed-off-by: Chao Yu 
---
 fs/f2fs/debug.c |  5 +
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/gc.c| 13 +++--
 fs/f2fs/gc.h|  2 +-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a66107b5cfff..715beb85e9db 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -104,6 +104,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->bg_gc = sbi->bg_gc;
+   si->bg_atomic = sbi->gc_thread->atomic_file[BG_GC];
+   si->fg_atomic = sbi->gc_thread->atomic_file[FG_GC];
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
@@ -342,6 +344,9 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_data_blks);
seq_printf(s, "  - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks);
+   seq_printf(s, "Failure : atomic write %d (%d)\n",
+   si->bg_atomic + si->fg_atomic,
+   si->bg_atomic);
seq_puts(s, "\nExtent Cache:\n");
seq_printf(s, "  - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3453288d6a71..567c6bb57ae3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3003,6 +3003,7 @@ struct f2fs_stat_info {
int bg_node_segs, bg_data_segs;
int tot_blks, data_blks, node_blks;
int bg_data_blks, bg_node_blks;
+   unsigned int bg_atomic, fg_atomic;
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
int curzone[NR_CURSEG_TYPE];
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 495876ca62b6..da89ca16a55d 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -135,7 +135,8 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->gc_urgent = 0;
gc_th->gc_wake= 0;
 
-   gc_th->atomic_file = 0;
+   gc_th->atomic_file[BG_GC] = 0;
+   gc_th->atomic_file[FG_GC] = 0;
 
sbi->gc_thread = gc_th;
init_waitqueue_head(>gc_thread->gc_wait_queue_head);
@@ -633,7 +634,7 @@ static void move_data_block(struct inode *inode, block_t 
bidx,
goto out;
 
if (f2fs_is_atomic_file(inode)) {
-   F2FS_I_SB(inode)->gc_thread->atomic_file++;
+   F2FS_I_SB(inode)->gc_thread->atomic_file[gc_type]++;
goto out;
}
 
@@ -742,7 +743,7 @@ static void move_data_page(struct inode *inode, block_t 
bidx, int gc_type,
goto out;
 
if (f2fs_is_atomic_file(inode)) {
-   F2FS_I_SB(inode)->gc_thread->atomic_file++;
+   F2FS_I_SB(inode)->gc_thread->atomic_file[gc_type]++;
goto out;
}
if (f2fs_is_pinned_file(inode)) {
@@ -1024,7 +1025,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(GFP_NOFS),
};
-   unsigned int last_atomic_file = sbi->gc_thread->atomic_file;
+   unsigned int last_atomic_file = sbi->gc_thread->atomic_file[FG_GC];
unsigned int skipped_round = 0, round = 0;
 
trace_f2fs_gc_begin(sbi->sb, sync, background,
@@ -1078,9 +1079,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
total_freed += seg_freed;
 
if (gc_type == FG_GC) {
-   if (sbi->gc_thread->atomic_file > last_atomic_file)
+   if (sbi->gc_thread->atomic_file[FG_GC] > last_atomic_file)
skipped_round++;
-   last_atomic_file = sbi->gc_thread->atomic_file;
+   last_atomic_file = sbi->gc_thread->atomic_file[FG_GC];
round++;
}
 
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index bc1d21d46ae7..a6cffe6b249b 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -41,7 +41,7 @@ struct f2fs_gc_kthread {
unsigned int gc_wake;
 
/* for stuck statistic */
-   unsigned int atomic_file;
+   unsigned int atomic_file[2];
 };
 
 struct gc_inode_list {
-- 
2.15.0.55.gc2ece9dc4de6


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 5/5] f2fs: fix to avoid race during access gc_thread pointer

2018-04-18 Thread Chao Yu
Thread AThread BThread C
- f2fs_remount
 - stop_gc_thread
- f2fs_sbi_store
- issue_discard_thread
   sbi->gc_thread = NULL;
  sbi->gc_thread->gc_wake = 1
  access 
sbi->gc_thread->gc_urgent

Previously, we allocate memory for sbi->gc_thread based on background
gc thread mount option, the memory can be released if we turn off
that mount option, but still there are several places access gc_thread
pointer without considering race condition, result in NULL point
dereference.

In order to fix this issue, keep gc_thread structure valid in sbi all
the time instead of alloc/free it dynamically.

Signed-off-by: Chao Yu 
---
 fs/f2fs/debug.c   |  3 +--
 fs/f2fs/f2fs.h|  7 +++
 fs/f2fs/gc.c  | 58 +--
 fs/f2fs/segment.c |  4 ++--
 fs/f2fs/super.c   | 13 +++--
 fs/f2fs/sysfs.c   |  8 
 6 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 715beb85e9db..7bb036a3bb81 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -223,8 +223,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->cache_mem = 0;
 
/* build gc */
-   if (sbi->gc_thread)
-   si->cache_mem += sizeof(struct f2fs_gc_kthread);
+   si->cache_mem += sizeof(struct f2fs_gc_kthread);
 
/* build merge flush thread */
if (SM_I(sbi)->fcc_info)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 567c6bb57ae3..c553f63199e8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info 
*sbi)
return (struct sit_info *)(SM_I(sbi)->sit_info);
 }
 
+static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
+{
+   return (struct f2fs_gc_kthread *)(sbi->gc_thread);
+}
+
 static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
 {
return (struct free_segmap_info *)(SM_I(sbi)->free_info);
@@ -2954,6 +2959,8 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, 
size_t len);
 /*
  * gc.c
  */
+int init_gc_context(struct f2fs_sb_info *sbi);
+void destroy_gc_context(struct f2fs_sb_info * sbi);
 int start_gc_thread(struct f2fs_sb_info *sbi);
 void stop_gc_thread(struct f2fs_sb_info *sbi);
 block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index da89ca16a55d..7d310e454b77 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -26,8 +26,8 @@
 static int gc_thread_func(void *data)
 {
struct f2fs_sb_info *sbi = data;
-   struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
-   wait_queue_head_t *wq = >gc_thread->gc_wait_queue_head;
+   struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+   wait_queue_head_t *wq = _th->gc_wait_queue_head;
unsigned int wait_ms;
 
wait_ms = gc_th->min_sleep_time;
@@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
return 0;
 }
 
-int start_gc_thread(struct f2fs_sb_info *sbi)
+int init_gc_context(struct f2fs_sb_info *sbi)
 {
struct f2fs_gc_kthread *gc_th;
-   dev_t dev = sbi->sb->s_bdev->bd_dev;
-   int err = 0;
 
gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
-   if (!gc_th) {
-   err = -ENOMEM;
-   goto out;
-   }
+   if (!gc_th)
+   return -ENOMEM;
+
+   gc_th->f2fs_gc_task = NULL;
 
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
@@ -139,26 +137,41 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->atomic_file[FG_GC] = 0;
 
sbi->gc_thread = gc_th;
-   init_waitqueue_head(>gc_thread->gc_wait_queue_head);
-   sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
+
+   return 0;
+}
+
+void destroy_gc_context(struct f2fs_sb_info *sbi)
+{
+   kfree(GC_I(sbi));
+   sbi->gc_thread = NULL;
+}
+
+int start_gc_thread(struct f2fs_sb_info *sbi)
+{
+   struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+   dev_t dev = sbi->sb->s_bdev->bd_dev;
+   int err = 0;
+
+   init_waitqueue_head(_th->gc_wait_queue_head);
+   gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
-   kfree(gc_th);
-   sbi->gc_thread = NULL;
+   gc_th->f2fs_gc_task = NULL;
}
-out:
+
return err;
 }
 
 void stop_gc_thread(struct f2fs_sb_info *sbi)
 {
-   struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
-   if (!gc_th)
-   return;
-   kthread_stop(gc_th->f2fs_gc_task);
-   kfree(gc_th);

Re: [f2fs-dev] [PATCH v2] f2fs: allocate hot_data for atomic write more strictly

2018-04-18 Thread Chao Yu
On 2018/4/18 11:06, Yunlei He wrote:
> If a file not set type as hot, has dirty pages more than
> threshold 64 before starting atomic write, may be lose hot
> flag.
> 
> v1->v2: move set FI_ATOMIC_FILE flag behind flush dirty pages too,
> in case of dirty pages before starting atomic use atomic mode to
> write back.
> 
> Signed-off-by: Yunlei He 

Reviewed-by: Chao Yu 

Thanks,


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH 2/2] f2fs: refactor read path to allow multiple postprocessing steps

2018-04-18 Thread Chao Yu
Hi Eric,

On 2018/4/18 1:42, Eric Biggers wrote:
> Hi Chao,
> 
> On Tue, Apr 17, 2018 at 05:13:12PM +0800, Chao Yu wrote:
>>> +
>>> +static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
>>> +
>>> +static void decrypt_work(struct work_struct *work)
>>> +{
>>> +   struct bio_post_read_ctx *ctx =
>>> +   container_of(work, struct bio_post_read_ctx, work);
>>> +
>>> +   fscrypt_decrypt_bio(ctx->bio);
>>> +
>>> +   bio_post_read_processing(ctx);
>>> +}
>>> +
>>> +static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
>>> +{
>>> +   switch (++ctx->cur_step) {
>>> +   case STEP_DECRYPT:
>>> +   if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
>>> +   INIT_WORK(>work, decrypt_work);
>>> +   fscrypt_enqueue_decrypt_work(>work);
>>> +   return;
>>> +   }
>>> +   ctx->cur_step++;
>>> +   /* fall-through */
>>> +   default:
>>> +   __read_end_io(ctx->bio);
>>> +   }
>>
>> How about introducing __bio_post_read_processing()
>>
>> switch (step) {
>> case STEP_DECRYPT:
>>  ...
>>  break;
>> case STEP_COMPRESS:
>>  ...
>>  break;
>> case STEP_GENERIC:
>>  __read_end_io;
>>  break;
>> ...
>> }
>>
>> Then we can customize flexible read processes like:
>>
>> bio_post_read_processing()
>> {
>>  if (encrypt_enabled)
>>  __bio_post_read_processing(, STEP_DECRYPT);
>>  if (compress_enabled)
>>  __bio_post_read_processing(, STEP_COMPRESS);
>>  __bio_post_read_processing(, STEP_GENERIC);
>> }
>>
>> Or other flow.
> 
> If I understand correctly, you're suggesting that all the steps be done in a
> single workqueue item?  The problem with that is that the verity work will

Yup,

> require I/O to the file to read hashes, which may need STEP_DECRYPT.  Hence,
> decryption and verity will need separate workqueues.

For decryption and verity, the needs separated data, I agree that we can not
merge the work into one workqueue.

As you mentioned in commit message, it can be used by compression later, so I
just thought that for decryption and decompression, maybe we can do those work
sequentially in one workqueue?

> 
>>> @@ -481,29 +537,33 @@ static struct bio *f2fs_grab_read_bio(struct inode 
>>> *inode, block_t blkaddr,
>>>  unsigned nr_pages)
>>>  {
>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> -   struct fscrypt_ctx *ctx = NULL;
>>> struct bio *bio;
>>> -
>>> -   if (f2fs_encrypted_file(inode)) {
>>> -   ctx = fscrypt_get_ctx(inode, GFP_NOFS);
>>> -   if (IS_ERR(ctx))
>>> -   return ERR_CAST(ctx);
>>> -
>>> -   /* wait the page to be moved by cleaning */
>>> -   f2fs_wait_on_block_writeback(sbi, blkaddr);
>>> -   }
>>> +   struct bio_post_read_ctx *ctx;
>>> +   unsigned int post_read_steps = 0;
>>>  
>>> bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
>>> -   if (!bio) {
>>> -   if (ctx)
>>> -   fscrypt_release_ctx(ctx);
>>> +   if (!bio)
>>> return ERR_PTR(-ENOMEM);
>>> -   }
>>> f2fs_target_device(sbi, blkaddr, bio);
>>> bio->bi_end_io = f2fs_read_end_io;
>>> -   bio->bi_private = ctx;
>>
>> bio->bi_private = NULL;
>>
> 
> I don't see why.  ->bi_private is NULL by default.

As we will check bi_private in read_end_io anyway, if it is not NULL, we will
parse it as an ctx, am I missing something?

Thanks,

> 
>>> +   bio_post_read_ctx_pool =
>>> +   mempool_create_slab_pool(128, bio_post_read_ctx_cache);
>>
>> #define MAX_POST_READ_CACHE_SIZE 128
>>
> 
> Yes, that makes sense.
> 
> - Eric
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel