----- Original Message -----
> From: "Andreas Gruenbacher" <agrue...@redhat.com>
> To: "Abhi Das" <a...@redhat.com>
> Cc: "cluster-devel" <cluster-devel@redhat.com>
> Sent: Friday, September 7, 2018 7:14:29 AM
> Subject: Re: [Cluster-devel] [GFS2 PATCH 4/4] gfs2: read journal in large
> chunks to locate the head
>
> Abhi,
>
> On 6 September 2018 at 19:02, Abhi Das <a...@redhat.com> wrote:
> > Use bio(s) to read in the journal sequentially in large chunks and
> > locate the head of the journal.
> > This is faster in most cases when compared to the existing bisect
> > method which operates one block at a time.
> >
> > Signed-off-by: Abhi Das <a...@redhat.com>
> > ---
> > fs/gfs2/incore.h | 8 +++-
> > fs/gfs2/lops.c | 122
> > +++++++++++++++++++++++++++++++++++++++++++++------
> > fs/gfs2/lops.h | 1 +
> > fs/gfs2/ops_fstype.c | 1 +
> > fs/gfs2/recovery.c | 115
> > +++++-------------------------------------------
> > 5 files changed, 129 insertions(+), 118 deletions(-)
> >
> > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> > index b96d39c..b24c105 100644
> > --- a/fs/gfs2/incore.h
> > +++ b/fs/gfs2/incore.h
> > @@ -529,6 +529,11 @@ struct gfs2_journal_extent {
> > u64 blocks;
> > };
> >
> > +enum {
> > + JDF_RECOVERY = 1,
> > + JDF_JHEAD = 2,
> > +};
> > +
> > struct gfs2_jdesc {
> > struct list_head jd_list;
> > struct list_head extent_list;
> > @@ -536,12 +541,13 @@ struct gfs2_jdesc {
> > struct work_struct jd_work;
> > struct inode *jd_inode;
> > unsigned long jd_flags;
> > -#define JDF_RECOVERY 1
> > unsigned int jd_jid;
> > unsigned int jd_blocks;
> > int jd_recover_error;
> > /* Replay stuff */
> >
> > + struct gfs2_log_header_host jd_jhead;
> > + struct bio *jd_rd_bio; /* bio used for reading this journal */
> > unsigned int jd_found_blocks;
> > unsigned int jd_found_revokes;
> > unsigned int jd_replayed_blocks;
> > diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
> > index 4cc19af..21979b2 100644
> > --- a/fs/gfs2/lops.c
> > +++ b/fs/gfs2/lops.c
> > @@ -18,6 +18,7 @@
> > #include <linux/fs.h>
> > #include <linux/list_sort.h>
> >
> > +#include "bmap.h"
> > #include "dir.h"
> > #include "gfs2.h"
> > #include "incore.h"
> > @@ -227,6 +228,50 @@ static void gfs2_end_log_write(struct bio *bio)
> > wake_up(&sdp->sd_log_flush_wait);
> > }
> >
> > +static void gfs2_end_log_read(struct bio *bio)
> > +{
> > + struct gfs2_jdesc *jd = bio->bi_private;
> > + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> > + struct page *page;
> > + struct bio_vec *bvec;
> > + int i, last;
> > +
> > + if (bio->bi_status) {
> > + fs_err(sdp, "Error %d reading from journal, jid=%u\n",
> > + bio->bi_status, jd->jd_jid);
> > + }
> > +
> > + bio_for_each_segment_all(bvec, bio, i) {
> > + struct gfs2_log_header_host uninitialized_var(lh);
> > + void *ptr;
> > +
> > + page = bvec->bv_page;
> > + ptr = page_address(page);
> > + last = page_private(page);
> > +
> > + if (!test_bit(JDF_JHEAD, &jd->jd_flags)) {
> > + mempool_free(page, gfs2_page_pool);
> > + continue;
> > + }
> > +
> > + if (!__get_log_header(sdp, ptr, 0, &lh)) {
> > + if (lh.lh_sequence > jd->jd_jhead.lh_sequence)
> > + jd->jd_jhead = lh;
> > + else
> > + goto found;
> > + }
> > +
> > + if (last) {
> > + found:
> > + clear_bit(JDF_JHEAD, &jd->jd_flags);
> > + wake_up_bit(&jd->jd_flags, JDF_JHEAD);
> > + }
> > + mempool_free(page, gfs2_page_pool);
> > + }
> > +
> > + bio_put(bio);
> > +}
> > +
> > /**
> > * gfs2_log_flush_bio - Submit any pending log bio
> > * @biop: Address of the bio pointer
> > @@ -241,8 +286,10 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int
> > op_flags)
> > {
> > struct bio *bio = *biop;
> > if (bio) {
> > - struct gfs2_sbd *sdp = bio->bi_private;
> > - atomic_inc(&sdp->sd_log_in_flight);
> > + if (op != REQ_OP_READ) {
> > + struct gfs2_sbd *sdp = bio->bi_private;
> > + atomic_inc(&sdp->sd_log_in_flight);
> > + }
> > bio_set_op_attrs(bio, op, op_flags);
> > submit_bio(bio);
> > *biop = NULL;
> > @@ -253,6 +300,7 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int
> > op_flags)
> > * gfs2_log_alloc_bio - Allocate a new bio for log writing
> > * @jd: The journal descriptor
> > * @blkno: The next device block number we want to write to
> > + * @op: REQ_OP
> > *
> > * This should never be called when there is a cached bio in the
> > * super block. When it returns, there will be a cached bio in the
> > @@ -262,21 +310,24 @@ void gfs2_log_flush_bio(struct bio **biop, int op,
> > int op_flags)
> > * Returns: Newly allocated bio
> > */
> >
> > -static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno)
> > +static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno,
> > int op)
> > {
> > struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> > struct super_block *sb = sdp->sd_vfs;
> > struct bio *bio;
> >
> > - BUG_ON(sdp->sd_log_bio);
> > + BUG_ON((op == REQ_OP_READ ? jd->jd_rd_bio : sdp->sd_log_bio));
> >
> > bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
> > bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
> > bio_set_dev(bio, sb->s_bdev);
> > - bio->bi_end_io = gfs2_end_log_write;
> > - bio->bi_private = sdp;
> > + bio->bi_end_io = op == REQ_OP_READ ? gfs2_end_log_read :
> > gfs2_end_log_write;
> > + bio->bi_private = op == REQ_OP_READ ? (void*)jd : (void*)sdp;
> >
> > - sdp->sd_log_bio = bio;
> > + if (op == REQ_OP_READ)
> > + jd->jd_rd_bio = bio;
> > + else
> > + sdp->sd_log_bio = bio;
> >
> > return bio;
> > }
> > @@ -285,6 +336,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc
> > *jd, u64 blkno)
> > * gfs2_log_get_bio - Get cached log bio, or allocate a new one
> > * @jd: The journal descriptor
> > * @blkno: The device block number we want to write to
> > + * @op: REQ_OP
> > *
> > * If there is a cached bio, then if the next block number is sequential
> > * with the previous one, return it, otherwise flush the bio to the
> > @@ -294,10 +346,10 @@ static struct bio *gfs2_log_alloc_bio(struct
> > gfs2_jdesc *jd, u64 blkno)
> > * Returns: The bio to use for log writes
> > */
> >
> > -static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno)
> > +static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno, int
> > op)
> > {
> > struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> > - struct bio *bio = sdp->sd_log_bio;
> > + struct bio *bio = op == REQ_OP_READ ? jd->jd_rd_bio :
> > sdp->sd_log_bio;
> > u64 nblk;
> >
> > if (bio) {
> > @@ -305,10 +357,12 @@ static struct bio *gfs2_log_get_bio(struct gfs2_jdesc
> > *jd, u64 blkno)
> > nblk >>= sdp->sd_fsb2bb_shift;
> > if (blkno == nblk)
> > return bio;
> > - gfs2_log_flush_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
> > + gfs2_log_flush_bio(op == REQ_OP_READ ? &jd->jd_rd_bio
> > + : &sdp->sd_log_bio, REQ_OP_WRITE, 0);
>
> Shouldn't it be "op" here instead of "REQ_OP_WRITE"?
>
Yep, you're right! Good catch. I'll repost this patch along with Bob's
suggestions as well.
Cheers!
--Abhi