Re: [Cluster-devel] [PATCH 02/14] fs/buffer: add some new buffer read helpers

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:20:59, Zhang Yi wrote:
> Current ll_rw_block() helper is fragile because it assumes that locked
> buffer means it's under IO which is submitted by some other who hold
> the lock, it skip buffer if it failed to get the lock, so it's only
> safe on the readahead path. Unfortunately, now that most filesystems
> still use this helper mistakenly on the sync metadata read path. There
> is no guarantee that the one who hold the buffer lock always submit IO
> (e.g. buffer_migrate_folio_norefs() after commit 88dbcbb3a484 ("blkdev:
> avoid migration stalls for blkdev pages"), it could lead to false
> positive -EIO when submitting reading IO.
> 
> This patch add some friendly buffer read helpers to prepare replace
> ll_rw_block() and similar calls. We can only call bh_readahead_[]
> helpers for the readahead paths.
> 
> Signed-off-by: Zhang Yi 

This looks mostly good. Just a few small nits below.

> diff --git a/fs/buffer.c b/fs/buffer.c
> index a0b70b3239f3..a663191903ed 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -3017,6 +3017,74 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
>  }
>  EXPORT_SYMBOL(bh_uptodate_or_lock);
>  
> +/**
> + * __bh_read - Submit read for a locked buffer
> + * @bh: struct buffer_head
> + * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
> + * @wait: wait until reading finish
> + *
> + * Returns zero on success or don't wait, and -EIO on error.
> + */
> +int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
> +{
> + int ret = 0;
> +
> + BUG_ON(!buffer_locked(bh));
> +
> + if (buffer_uptodate(bh)) {
> + unlock_buffer(bh);
> + return ret;
> + }
> +
> + get_bh(bh);
> + bh->b_end_io = end_buffer_read_sync;
> + submit_bh(REQ_OP_READ | op_flags, bh);
> + if (wait) {
> + wait_on_buffer(bh);
> + if (!buffer_uptodate(bh))
> + ret = -EIO;
> + }
> + return ret;
> +}
> +EXPORT_SYMBOL(__bh_read);
> +
> +/**
> + * __bh_read_batch - Submit read for a batch of unlocked buffers
> + * @bhs: a batch of struct buffer_head
> + * @nr: number of this batch
> + * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
> + * @force_lock: force to get a lock on the buffer if set, otherwise drops any
> + *  buffer that cannot lock.
> + *
> + * Returns zero on success or don't wait, and -EIO on error.
> + */
> +void __bh_read_batch(struct buffer_head *bhs[],
> +  int nr, blk_opf_t op_flags, bool force_lock)
> +{
> + int i;
> +
> + for (i = 0; i < nr; i++) {
> + struct buffer_head *bh = bhs[i];
> +
> + if (buffer_uptodate(bh))
> + continue;
> + if (!trylock_buffer(bh)) {
> + if (!force_lock)
> + continue;
> + lock_buffer(bh);
> + }

This would be a bit more efficient for the force_lock case like:

if (force_lock)
lock_buffer(bh);
else
if (!trylock_buffer(bh))
continue;

> + if (buffer_uptodate(bh)) {
> + unlock_buffer(bh);
> + continue;
> + }
> +
> + bh->b_end_io = end_buffer_read_sync;
> + get_bh(bh);
> + submit_bh(REQ_OP_READ | op_flags, bh);
> + }
> +}
> +EXPORT_SYMBOL(__bh_read_batch);
> +
>  /**
>   * bh_submit_read - Submit a locked buffer for reading
>   * @bh: struct buffer_head
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index c3863c417b00..8a01c07c0418 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -232,6 +232,9 @@ void write_boundary_block(struct block_device *bdev,
>   sector_t bblock, unsigned blocksize);
>  int bh_uptodate_or_lock(struct buffer_head *bh);
>  int bh_submit_read(struct buffer_head *bh);
> +int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
> +void __bh_read_batch(struct buffer_head *bhs[],
> +  int nr, blk_opf_t op_flags, bool force_lock);
>  
>  extern int buffer_heads_over_limit;
>  
> @@ -399,6 +402,40 @@ static inline struct buffer_head *__getblk(struct 
> block_device *bdev,
>   return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
>  }
>  
> +static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> + if (trylock_buffer(bh))
> + __bh_read(bh, op_flags, false);
> +}
> +
> +static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> + lock_buffer(bh);
> + __bh_read(bh, op_flags, false);
> +}
> +
> +static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> + lock_buffer(bh);
> + return __bh_read(bh, op_flags, true);
> +}

I would use bh_uptodate_or_lock() helper in the above two functions to
avoid locking the buffer in 

Re: [Cluster-devel] [PATCH 08/14] ocfs2: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:05, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync read path because it cannot
> guarantee that submitting read IO if the buffer has been locked. We
> could get false positive EIO after wait_on_buffer() if the buffer has
> been locked by others. So stop using ll_rw_block() in ocfs2.
> 
> Signed-off-by: Zhang Yi 
> ---
>  fs/ocfs2/aops.c  | 2 +-
>  fs/ocfs2/super.c | 5 +
>  2 files changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index af4157f61927..1d65f6ef00ca 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -636,7 +636,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
>  !buffer_new(bh) &&
>  ocfs2_should_read_blk(inode, page, block_start) &&
>  (block_start < from || block_end > to)) {
> - ll_rw_block(REQ_OP_READ, 1, );
> + bh_read_nowait(bh, 0);
>   *wait_bh++=bh;
>   }
>  
> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
> index e2cc9eec287c..4050f35bbd0c 100644
> --- a/fs/ocfs2/super.c
> +++ b/fs/ocfs2/super.c
> @@ -1763,10 +1763,7 @@ static int ocfs2_get_sector(struct super_block *sb,
>   lock_buffer(*bh);
>   if (!buffer_dirty(*bh))
>   clear_buffer_uptodate(*bh);
> - unlock_buffer(*bh);
> - ll_rw_block(REQ_OP_READ, 1, bh);
> - wait_on_buffer(*bh);
> - if (!buffer_uptodate(*bh)) {
> + if (bh_read_locked(*bh, 0)) {
>   mlog_errno(-EIO);
>   brelse(*bh);
>   *bh = NULL;

I would just use bh_read() here and drop bh_read_locked() altogether...

Honza
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 14/14] fs/buffer: remove bh_submit_read() helper

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:11, Zhang Yi wrote:
> bh_submit_read() has no user anymore, just remove it.
> 
> Signed-off-by: Zhang Yi 

Looks good. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/buffer.c | 25 -
>  include/linux/buffer_head.h |  1 -
>  2 files changed, 26 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index d1d09e2dacc2..fa7c2dbcef4c 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -3029,31 +3029,6 @@ void __bh_read_batch(struct buffer_head *bhs[],
>  }
>  EXPORT_SYMBOL(__bh_read_batch);
>  
> -/**
> - * bh_submit_read - Submit a locked buffer for reading
> - * @bh: struct buffer_head
> - *
> - * Returns zero on success and -EIO on error.
> - */
> -int bh_submit_read(struct buffer_head *bh)
> -{
> - BUG_ON(!buffer_locked(bh));
> -
> - if (buffer_uptodate(bh)) {
> - unlock_buffer(bh);
> - return 0;
> - }
> -
> - get_bh(bh);
> - bh->b_end_io = end_buffer_read_sync;
> - submit_bh(REQ_OP_READ, bh);
> - wait_on_buffer(bh);
> - if (buffer_uptodate(bh))
> - return 0;
> - return -EIO;
> -}
> -EXPORT_SYMBOL(bh_submit_read);
> -
>  void __init buffer_init(void)
>  {
>   unsigned long nrpages;
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index 1c93ff8c8f51..576f3609ac4e 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -230,7 +230,6 @@ int submit_bh(blk_opf_t, struct buffer_head *);
>  void write_boundary_block(struct block_device *bdev,
>   sector_t bblock, unsigned blocksize);
>  int bh_uptodate_or_lock(struct buffer_head *bh);
> -int bh_submit_read(struct buffer_head *bh);
>  int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
>  void __bh_read_batch(struct buffer_head *bhs[],
>int nr, blk_opf_t op_flags, bool force_lock);
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 13/14] ext2: replace bh_submit_read() helper with bh_read_locked()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:10, Zhang Yi wrote:
> bh_submit_read() is almost the same as bh_read_locked(), switch to use
> bh_read_locked() in read_block_bitmap(), prepare remove the old one.
> 
> Signed-off-by: Zhang Yi 
...
> diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
> index c17ccc19b938..548a1d607f5a 100644
> --- a/fs/ext2/balloc.c
> +++ b/fs/ext2/balloc.c
> @@ -142,7 +142,7 @@ read_block_bitmap(struct super_block *sb, unsigned int 
> block_group)
>   if (likely(bh_uptodate_or_lock(bh)))
>   return bh;
>  
> - if (bh_submit_read(bh) < 0) {
> + if (bh_read_locked(bh, 0) < 0) {

I would just use bh_read() here and thus remove a bit more of the
boilerplate code (like the bh_uptodate_or_lock() checking).

Honza

>   brelse(bh);
>   ext2_error(sb, __func__,
>   "Cannot read block bitmap - "
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 12/14] fs/buffer: remove ll_rw_block() helper

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:09, Zhang Yi wrote:
> Now that all ll_rw_block() users has been replaced to new safe helpers,
> we just remove it here.
> 
> Signed-off-by: Zhang Yi 

Looks good. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/buffer.c | 63 +++--
>  include/linux/buffer_head.h |  1 -
>  2 files changed, 4 insertions(+), 60 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index e14adc638bfe..d1d09e2dacc2 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -152,7 +152,7 @@ static void __end_buffer_read_notouch(struct buffer_head 
> *bh, int uptodate)
>  
>  /*
>   * Default synchronous end-of-IO handler..  Just mark it up-to-date and
> - * unlock the buffer. This is what ll_rw_block uses too.
> + * unlock the buffer.
>   */
>  void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
>  {
> @@ -491,8 +491,8 @@ int inode_has_buffers(struct inode *inode)
>   * all already-submitted IO to complete, but does not queue any new
>   * writes to the disk.
>   *
> - * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
> - * you dirty the buffers, and then use osync_inode_buffers to wait for
> + * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer
> + * as you dirty the buffers, and then use osync_inode_buffers to wait for
>   * completion.  Any other dirty buffers which are not yet queued for
>   * write will not be flushed to disk by the osync.
>   */
> @@ -1807,7 +1807,7 @@ int __block_write_full_page(struct inode *inode, struct 
> page *page,
>   /*
>* The page was marked dirty, but the buffers were
>* clean.  Someone wrote them back by hand with
> -  * ll_rw_block/submit_bh.  A rare case.
> +  * write_dirty_buffer/submit_bh.  A rare case.
>*/
>   end_page_writeback(page);
>  
> @@ -2714,61 +2714,6 @@ int submit_bh(blk_opf_t opf, struct buffer_head *bh)
>  }
>  EXPORT_SYMBOL(submit_bh);
>  
> -/**
> - * ll_rw_block: low-level access to block devices (DEPRECATED)
> - * @opf: block layer request operation and flags.
> - * @nr: number of  buffer_heads in the array
> - * @bhs: array of pointers to  buffer_head
> - *
> - * ll_rw_block() takes an array of pointers to  buffer_heads, and
> - * requests an I/O operation on them, either a %REQ_OP_READ or a 
> %REQ_OP_WRITE.
> - * @opf contains flags modifying the detailed I/O behavior, most notably
> - * %REQ_RAHEAD.
> - *
> - * This function drops any buffer that it cannot get a lock on (with the
> - * BH_Lock state bit), any buffer that appears to be clean when doing a write
> - * request, and any buffer that appears to be up-to-date when doing read
> - * request.  Further it marks as clean buffers that are processed for
> - * writing (the buffer cache won't assume that they are actually clean
> - * until the buffer gets unlocked).
> - *
> - * ll_rw_block sets b_end_io to simple completion handler that marks
> - * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
> - * any waiters. 
> - *
> - * All of the buffers must be for the same device, and must also be a
> - * multiple of the current approved size for the device.
> - */
> -void ll_rw_block(const blk_opf_t opf, int nr, struct buffer_head *bhs[])
> -{
> - const enum req_op op = opf & REQ_OP_MASK;
> - int i;
> -
> - for (i = 0; i < nr; i++) {
> - struct buffer_head *bh = bhs[i];
> -
> - if (!trylock_buffer(bh))
> - continue;
> - if (op == REQ_OP_WRITE) {
> - if (test_clear_buffer_dirty(bh)) {
> - bh->b_end_io = end_buffer_write_sync;
> - get_bh(bh);
> - submit_bh(opf, bh);
> - continue;
> - }
> - } else {
> - if (!buffer_uptodate(bh)) {
> - bh->b_end_io = end_buffer_read_sync;
> - get_bh(bh);
> - submit_bh(opf, bh);
> - continue;
> - }
> - }
> - unlock_buffer(bh);
> - }
> -}
> -EXPORT_SYMBOL(ll_rw_block);
> -
>  void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
>  {
>   lock_buffer(bh);
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index 8a01c07c0418..1c93ff8c8f51 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -223,7 +223,6 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
>  void free_buffer_head(struct buffer_head * bh);
>  void unlock_buffer(struct buffer_head *bh);
>  void __lock_buffer(struct buffer_head *bh);
> -void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]);
>  int sync_dirty_buffer(struct 

Re: [Cluster-devel] [PATCH 11/14] ufs: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:08, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync read path because it cannot
> guarantee that submitting read IO if the buffer has been locked. We
> could get false positive EIO after wait_on_buffer() if the buffer has
> been locked by others. So stop using ll_rw_block() in ufs.
> 
> Signed-off-by: Zhang Yi 

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/ufs/balloc.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
> index bd810d8239f2..bbc2159eece4 100644
> --- a/fs/ufs/balloc.c
> +++ b/fs/ufs/balloc.c
> @@ -296,9 +296,7 @@ static void ufs_change_blocknr(struct inode *inode, 
> sector_t beg,
>   if (!buffer_mapped(bh))
>   map_bh(bh, inode->i_sb, oldb + pos);
>   if (!buffer_uptodate(bh)) {
> - ll_rw_block(REQ_OP_READ, 1, );
> - wait_on_buffer(bh);
> - if (!buffer_uptodate(bh)) {
> + if (bh_read(bh, 0)) {
>   ufs_error(inode->i_sb, __func__,
> "read of block failed\n");
>   break;
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 10/14] udf: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:07, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync read path because it cannot
> guarantee that submitting read IO if the buffer has been locked. We
> could get false positive EIO after wait_on_buffer() if the buffer has
> been locked by others. So stop using ll_rw_block(). We also switch to
> new bh_readahead_batch() helper for the buffer array readahead path.
> 
> Signed-off-by: Zhang Yi 

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/udf/dir.c   | 2 +-
>  fs/udf/directory.c | 2 +-
>  fs/udf/inode.c | 5 +
>  3 files changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/udf/dir.c b/fs/udf/dir.c
> index cad3772f9dbe..15a98aa33aa8 100644
> --- a/fs/udf/dir.c
> +++ b/fs/udf/dir.c
> @@ -130,7 +130,7 @@ static int udf_readdir(struct file *file, struct 
> dir_context *ctx)
>   brelse(tmp);
>   }
>   if (num) {
> - ll_rw_block(REQ_OP_READ | REQ_RAHEAD, num, bha);
> + bh_readahead_batch(bha, num, REQ_RAHEAD);
>   for (i = 0; i < num; i++)
>   brelse(bha[i]);
>   }
> diff --git a/fs/udf/directory.c b/fs/udf/directory.c
> index a2adf6293093..469bc22d6bff 100644
> --- a/fs/udf/directory.c
> +++ b/fs/udf/directory.c
> @@ -89,7 +89,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, 
> loff_t *nf_pos,
>   brelse(tmp);
>   }
>   if (num) {
> - ll_rw_block(REQ_OP_READ | REQ_RAHEAD, num, bha);
> + bh_readahead_batch(bha, num, REQ_RAHEAD);
>   for (i = 0; i < num; i++)
>   brelse(bha[i]);
>   }
> diff --git a/fs/udf/inode.c b/fs/udf/inode.c
> index 8d06daed549f..0971f09d20fc 100644
> --- a/fs/udf/inode.c
> +++ b/fs/udf/inode.c
> @@ -1214,10 +1214,7 @@ struct buffer_head *udf_bread(struct inode *inode, 
> udf_pblk_t block,
>   if (buffer_uptodate(bh))
>   return bh;
>  
> - ll_rw_block(REQ_OP_READ, 1, );
> -
> - wait_on_buffer(bh);
> - if (buffer_uptodate(bh))
> + if (!bh_read(bh, 0))
>   return bh;
>  
>   brelse(bh);
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 09/14] reiserfs: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:06, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync read/write path because it cannot
> guarantee that submitting read/write IO if the buffer has been locked.
> We could get false positive EIO after wait_on_buffer() in read path if
> the buffer has been locked by others. So stop using ll_rw_block() in
> reiserfs. We also switch to new bh_readahead_batch() helper for the
> buffer array readahead path.
> 
> Signed-off-by: Zhang Yi 

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/reiserfs/journal.c | 11 ++-
>  fs/reiserfs/stree.c   |  4 ++--
>  fs/reiserfs/super.c   |  4 +---
>  3 files changed, 9 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
> index 94addfcefede..699b1b8d5b73 100644
> --- a/fs/reiserfs/journal.c
> +++ b/fs/reiserfs/journal.c
> @@ -868,7 +868,7 @@ static int write_ordered_buffers(spinlock_t * lock,
>*/
>   if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
>   spin_unlock(lock);
> - ll_rw_block(REQ_OP_WRITE, 1, );
> + write_dirty_buffer(bh, 0);
>   spin_lock(lock);
>   }
>   put_bh(bh);
> @@ -1054,7 +1054,7 @@ static int flush_commit_list(struct super_block *s,
>   if (tbh) {
>   if (buffer_dirty(tbh)) {
>   depth = reiserfs_write_unlock_nested(s);
> - ll_rw_block(REQ_OP_WRITE, 1, );
> + write_dirty_buffer(tbh, 0);
>   reiserfs_write_lock_nested(s, depth);
>   }
>   put_bh(tbh) ;
> @@ -2240,7 +2240,7 @@ static int journal_read_transaction(struct super_block 
> *sb,
>   }
>   }
>   /* read in the log blocks, memcpy to the corresponding real block */
> - ll_rw_block(REQ_OP_READ, get_desc_trans_len(desc), log_blocks);
> + bh_read_batch(log_blocks, get_desc_trans_len(desc));
>   for (i = 0; i < get_desc_trans_len(desc); i++) {
>  
>   wait_on_buffer(log_blocks[i]);
> @@ -2342,10 +2342,11 @@ static struct buffer_head *reiserfs_breada(struct 
> block_device *dev,
>   } else
>   bhlist[j++] = bh;
>   }
> - ll_rw_block(REQ_OP_READ, j, bhlist);
> + bh = bhlist[0];
> + bh_read_nowait(bh, 0);
> + bh_readahead_batch([1], j - 1, 0);
>   for (i = 1; i < j; i++)
>   brelse(bhlist[i]);
> - bh = bhlist[0];
>   wait_on_buffer(bh);
>   if (buffer_uptodate(bh))
>   return bh;
> diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
> index 9a293609a022..84c12a1947b2 100644
> --- a/fs/reiserfs/stree.c
> +++ b/fs/reiserfs/stree.c
> @@ -579,7 +579,7 @@ static int search_by_key_reada(struct super_block *s,
>   if (!buffer_uptodate(bh[j])) {
>   if (depth == -1)
>   depth = reiserfs_write_unlock_nested(s);
> - ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, bh + j);
> + bh_readahead(bh[j], REQ_RAHEAD);
>   }
>   brelse(bh[j]);
>   }
> @@ -685,7 +685,7 @@ int search_by_key(struct super_block *sb, const struct 
> cpu_key *key,
>   if (!buffer_uptodate(bh) && depth == -1)
>   depth = reiserfs_write_unlock_nested(sb);
>  
> - ll_rw_block(REQ_OP_READ, 1, );
> + bh_read_nowait(bh, 0);
>   wait_on_buffer(bh);
>  
>   if (depth != -1)
> diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
> index c88cd2ce0665..8b1db82b6949 100644
> --- a/fs/reiserfs/super.c
> +++ b/fs/reiserfs/super.c
> @@ -1702,9 +1702,7 @@ static int read_super_block(struct super_block *s, int 
> offset)
>  /* after journal replay, reread all bitmap and super blocks */
>  static int reread_meta_blocks(struct super_block *s)
>  {
> - ll_rw_block(REQ_OP_READ, 1, _BUFFER_WITH_SB(s));
> - wait_on_buffer(SB_BUFFER_WITH_SB(s));
> - if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
> + if (bh_read(SB_BUFFER_WITH_SB(s), 0)) {
>   reiserfs_warning(s, "reiserfs-2504", "error reading the super");
>   return 1;
>   }
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 07/14] ntfs3: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:04, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync read path because it cannot
> guarantee that submitting read IO if the buffer has been locked. We
> could get false positive EIO after wait_on_buffer() if the buffer has
> been locked by others. So stop using ll_rw_block() in
> ntfs_get_block_vbo().
> 
> Signed-off-by: Zhang Yi 

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/ntfs3/inode.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
> index 51363d4e8636..bbe7d4ea1750 100644
> --- a/fs/ntfs3/inode.c
> +++ b/fs/ntfs3/inode.c
> @@ -630,12 +630,9 @@ static noinline int ntfs_get_block_vbo(struct inode 
> *inode, u64 vbo,
>   bh->b_size = block_size;
>   off = vbo & (PAGE_SIZE - 1);
>   set_bh_page(bh, page, off);
> - ll_rw_block(REQ_OP_READ, 1, );
> - wait_on_buffer(bh);
> - if (!buffer_uptodate(bh)) {
> - err = -EIO;
> + err = bh_read(bh, 0);
> + if (err)
>   goto out;
> - }
>   zero_user_segment(page, off + voff, off + block_size);
>   }
>   }
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 06/14] jbd2: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:03, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync read path because it cannot
> guarantee that submitting read IO if the buffer has been locked. We
> could get false positive EIO after wait_on_buffer() if the buffer has
> been locked by others. So stop using ll_rw_block() in
> journal_get_superblock(). We also switch to new bh_readahead_batch()
> for the buffer array readahead path.
> 
> Signed-off-by: Zhang Yi 

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/jbd2/journal.c  |  7 +++
>  fs/jbd2/recovery.c | 16 ++--
>  2 files changed, 13 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 6350d3857c89..5a903aae6aad 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -1893,15 +1893,14 @@ static int journal_get_superblock(journal_t *journal)
>  {
>   struct buffer_head *bh;
>   journal_superblock_t *sb;
> - int err = -EIO;
> + int err;
>  
>   bh = journal->j_sb_buffer;
>  
>   J_ASSERT(bh != NULL);
>   if (!buffer_uptodate(bh)) {
> - ll_rw_block(REQ_OP_READ, 1, );
> - wait_on_buffer(bh);
> - if (!buffer_uptodate(bh)) {
> + err = bh_read(bh, 0);
> + if (err) {
>   printk(KERN_ERR
>   "JBD2: IO error reading journal superblock\n");
>   goto out;
> diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
> index f548479615c6..ee56a30b71cf 100644
> --- a/fs/jbd2/recovery.c
> +++ b/fs/jbd2/recovery.c
> @@ -100,7 +100,7 @@ static int do_readahead(journal_t *journal, unsigned int 
> start)
>   if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
>   bufs[nbufs++] = bh;
>   if (nbufs == MAXBUF) {
> - ll_rw_block(REQ_OP_READ, nbufs, bufs);
> + bh_readahead_batch(bufs, nbufs, 0);
>   journal_brelse_array(bufs, nbufs);
>   nbufs = 0;
>   }
> @@ -109,7 +109,7 @@ static int do_readahead(journal_t *journal, unsigned int 
> start)
>   }
>  
>   if (nbufs)
> - ll_rw_block(REQ_OP_READ, nbufs, bufs);
> + bh_readahead_batch(bufs, nbufs, 0);
>   err = 0;
>  
>  failed:
> @@ -152,9 +152,14 @@ static int jread(struct buffer_head **bhp, journal_t 
> *journal,
>   return -ENOMEM;
>  
>   if (!buffer_uptodate(bh)) {
> - /* If this is a brand new buffer, start readahead.
> -   Otherwise, we assume we are already reading it.  */
> - if (!buffer_req(bh))
> + /*
> +  * If this is a brand new buffer, start readahead.
> +  * Otherwise, we assume we are already reading it.
> +  */
> + bool need_readahead = !buffer_req(bh);
> +
> + bh_read_nowait(bh, 0);
> + if (need_readahead)
>   do_readahead(journal, offset);
>   wait_on_buffer(bh);
>   }
> @@ -687,7 +692,6 @@ static int do_one_pass(journal_t *journal,
>   mark_buffer_dirty(nbh);
>   BUFFER_TRACE(nbh, "marking uptodate");
>   ++info->nr_replays;
> - /* ll_rw_block(WRITE, 1, ); */
>   unlock_buffer(nbh);
>   brelse(obh);
>   brelse(nbh);
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 05/14] isofs: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:02, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync read path because it cannot
> guarantee that submitting read IO if the buffer has been locked. We
> could get false positive EIO return from zisofs_uncompress_block() if
> he buffer has been locked by others. So stop using ll_rw_block(),
> switch to sync helper instead.
> 
> Signed-off-by: Zhang Yi 

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/isofs/compress.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
> index b466172eec25..ac35eddff29c 100644
> --- a/fs/isofs/compress.c
> +++ b/fs/isofs/compress.c
> @@ -82,7 +82,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, 
> loff_t block_start,
>   return 0;
>   }
>   haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
> - ll_rw_block(REQ_OP_READ, haveblocks, bhs);
> + bh_read_batch(bhs, haveblocks);
>  
>   curbh = 0;
>   curpage = 0;
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 04/14] gfs2: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:01, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync read path because it cannot
> guarantee that always submitting read IO if the buffer has been locked,
> so stop using it. We also switch to new bh_readahead() helper for the
> readahead path.
> 
> Signed-off-by: Zhang Yi 

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/gfs2/meta_io.c | 6 ++
>  fs/gfs2/quota.c   | 4 +---
>  2 files changed, 3 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
> index 7e70e0ba5a6c..07e882aa7ebd 100644
> --- a/fs/gfs2/meta_io.c
> +++ b/fs/gfs2/meta_io.c
> @@ -525,8 +525,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, 
> u64 dblock, u32 extlen)
>  
>   if (buffer_uptodate(first_bh))
>   goto out;
> - if (!buffer_locked(first_bh))
> - ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, _bh);
> + bh_read_nowait(first_bh, REQ_META | REQ_PRIO);
>  
>   dblock++;
>   extlen--;
> @@ -535,8 +534,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, 
> u64 dblock, u32 extlen)
>   bh = gfs2_getbuf(gl, dblock, CREATE);
>  
>   if (!buffer_uptodate(bh) && !buffer_locked(bh))
> - ll_rw_block(REQ_OP_READ | REQ_RAHEAD | REQ_META |
> - REQ_PRIO, 1, );
> + bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO);
>   brelse(bh);
>   dblock++;
>   extlen--;
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index f201eaf59d0d..0c2ef4226aba 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -746,9 +746,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, 
> unsigned long index,
>   if (PageUptodate(page))
>   set_buffer_uptodate(bh);
>   if (!buffer_uptodate(bh)) {
> - ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, );
> - wait_on_buffer(bh);
> - if (!buffer_uptodate(bh))
> + if (bh_read(bh, REQ_META | REQ_PRIO))
>   goto unlock_out;
>   }
>   if (gfs2_is_jdata(ip))
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 03/14] fs/buffer: replace ll_rw_block()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:21:00, Zhang Yi wrote:
> ll_rw_block() is not safe for the sync IO path because it skip buffers
> which has been locked by others, it could lead to false positive EIO
> when submitting read IO. So stop using ll_rw_block(), switch to use new
> helpers which could guarantee buffer locked and submit IO if needed.
> 
> Signed-off-by: Zhang Yi 
> ---
>  fs/buffer.c | 13 ++---
>  1 file changed, 6 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index a663191903ed..e14adc638bfe 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
...
> @@ -1342,7 +1342,8 @@ void __breadahead(struct block_device *bdev, sector_t 
> block, unsigned size)
>  {
>   struct buffer_head *bh = __getblk(bdev, block, size);
>   if (likely(bh)) {
> - ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, );
> + if (trylock_buffer(bh))
> + __bh_read(bh, REQ_RAHEAD, false);

I suppose this can be bh_readahead()?

>   brelse(bh);
>   }
>  }

Otherwise the patch looks good.

Honza
-- 
Jan Kara 
SUSE Labs, CR



Re: [Cluster-devel] [PATCH 01/14] fs/buffer: remove __breadahead_gfp()

2022-08-31 Thread Jan Kara
On Wed 31-08-22 15:20:58, Zhang Yi wrote:
> No one use __breadahead_gfp() and sb_breadahead_unmovable() any more,
> remove them.
> 
> Signed-off-by: Zhang Yi 

Looks good. Feel free to add:

Reviewed-by: Jan Kara 

Honza

> ---
>  fs/buffer.c | 11 ---
>  include/linux/buffer_head.h |  8 
>  2 files changed, 19 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 55e762a58eb6..a0b70b3239f3 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1348,17 +1348,6 @@ void __breadahead(struct block_device *bdev, sector_t 
> block, unsigned size)
>  }
>  EXPORT_SYMBOL(__breadahead);
>  
> -void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned 
> size,
> -   gfp_t gfp)
> -{
> - struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
> - if (likely(bh)) {
> - ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, );
> - brelse(bh);
> - }
> -}
> -EXPORT_SYMBOL(__breadahead_gfp);
> -
>  /**
>   *  __bread_gfp() - reads a specified block and returns the bh
>   *  @bdev: the block_device to read from
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index 089c9ade4325..c3863c417b00 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -214,8 +214,6 @@ struct buffer_head *__getblk_gfp(struct block_device 
> *bdev, sector_t block,
>  void __brelse(struct buffer_head *);
>  void __bforget(struct buffer_head *);
>  void __breadahead(struct block_device *, sector_t block, unsigned int size);
> -void __breadahead_gfp(struct block_device *, sector_t block, unsigned int 
> size,
> -   gfp_t gfp);
>  struct buffer_head *__bread_gfp(struct block_device *,
>   sector_t block, unsigned size, gfp_t gfp);
>  void invalidate_bh_lrus(void);
> @@ -340,12 +338,6 @@ sb_breadahead(struct super_block *sb, sector_t block)
>   __breadahead(sb->s_bdev, block, sb->s_blocksize);
>  }
>  
> -static inline void
> -sb_breadahead_unmovable(struct super_block *sb, sector_t block)
> -{
> - __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0);
> -}
> -
>  static inline struct buffer_head *
>  sb_getblk(struct super_block *sb, sector_t block)
>  {
> -- 
> 2.31.1
> 
-- 
Jan Kara 
SUSE Labs, CR