[Cluster-devel] [gfs2 patch] gfs2: fix use-after-free on transaction ail lists
Hi, Before this patch, transactions could be merged into the system transaction by function gfs2_merge_trans(), but then it is not considered attached. The caller, log_refund, only sets TR_ATTACHED if the transaction is not merged. Later, in function, gfs2_trans_end, if TR_ATTACHED is not set (because it was merged) the transaction is freed by gfs2_trans_free(). However, by the time this happens, the log flush mechanism may have queued bd elements to the ail list, which means tr_ail1_list may not be empty, and the flush may forget about the bd elements altogether (memory leak) or worse, reference them after the transaction has been freed. This patch adds logic into gfs2_merge_trans() to move the merged transaction's ail lists to the sdp transaction. This prevents the use-after-free. To facilitate this, we pass sdp into the function instead of the transaction itself. Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 79aa0fb4984e..91e1eebc5ee0 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1017,8 +1017,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) * @new: New transaction to be merged */ -static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) +static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new) { + struct gfs2_trans *old = sdp->sd_log_tr; + WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags)); old->tr_num_buf_new += new->tr_num_buf_new; @@ -1030,6 +1032,11 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); list_splice_tail_init(&new->tr_buf, &old->tr_buf); + + spin_lock(&sdp->sd_ail_lock); + list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list); + list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list); + spin_unlock(&sdp->sd_ail_lock); } static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) @@ -1041,7 +1048,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_log_lock(sdp); if (sdp->sd_log_tr) { - gfs2_merge_trans(sdp->sd_log_tr, tr); + gfs2_merge_trans(sdp, tr); } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags)); sdp->sd_log_tr = tr;
[Cluster-devel] [PATCH AUTOSEL 5.6 12/17] gfs2: Even more gfs2_find_jhead fixes
From: Andreas Gruenbacher [ Upstream commit 20be493b787cd581c9fffad7fcd6bfbe6af1050c ] Fix several issues in the previous gfs2_find_jhead fix: * When updating @blocks_submitted, @block refers to the first block block not submitted yet, not the last block submitted, so fix an off-by-one error. * We want to ensure that @blocks_submitted is far enough ahead of @blocks_read to guarantee that there is in-flight I/O. Otherwise, we'll eventually end up waiting for pages that haven't been submitted, yet. * It's much easier to compare the number of blocks added with the number of blocks submitted to limit the maximum bio size. * Even with bio chaining, we can keep adding blocks until we reach the maximum bio size, as long as we stop at a page boundary. This simplifies the logic. Signed-off-by: Andreas Gruenbacher Reviewed-by: Bob Peterson Signed-off-by: Sasha Levin --- fs/gfs2/lops.c | 15 +-- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3a020bdc358c..966ed37c9acd 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -505,12 +505,12 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, unsigned int bsize = sdp->sd_sb.sb_bsize, off; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; unsigned int shift = PAGE_SHIFT - bsize_shift; - unsigned int max_bio_size = 2 * 1024 * 1024; + unsigned int max_blocks = 2 * 1024 * 1024 >> bsize_shift; struct gfs2_journal_extent *je; int sz, ret = 0; struct bio *bio = NULL; struct page *page = NULL; - bool bio_chained = false, done = false; + bool done = false; errseq_t since; memset(head, 0, sizeof(*head)); @@ -533,10 +533,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, off = 0; } - if (!bio || (bio_chained && !off) || - bio->bi_iter.bi_size >= max_bio_size) { - /* start new bio */ - } else { + if (bio && (off || block < blocks_submitted + max_blocks)) { sector_t sector = dblock << sdp->sd_fsb2bb_shift; if (bio_end_sector(bio) == sector) { @@ -549,19 +546,17 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, (PAGE_SIZE - off) >> bsize_shift; bio = gfs2_chain_bio(bio, blocks); - bio_chained = true; goto add_block_to_new_bio; } } if (bio) { - blocks_submitted = block + 1; + blocks_submitted = block; submit_bio(bio); } bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read); bio->bi_opf = REQ_OP_READ; - bio_chained = false; add_block_to_new_bio: sz = bio_add_page(bio, page, bsize, off); BUG_ON(sz != bsize); @@ -569,7 +564,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, off += bsize; if (off == PAGE_SIZE) page = NULL; - if (blocks_submitted < 2 * max_bio_size >> bsize_shift) { + if (blocks_submitted <= blocks_read + max_blocks) { /* Keep at least one bio in flight */ continue; } -- 2.25.1
[Cluster-devel] [PATCH AUTOSEL 5.4 10/14] gfs2: Even more gfs2_find_jhead fixes
From: Andreas Gruenbacher [ Upstream commit 20be493b787cd581c9fffad7fcd6bfbe6af1050c ] Fix several issues in the previous gfs2_find_jhead fix: * When updating @blocks_submitted, @block refers to the first block block not submitted yet, not the last block submitted, so fix an off-by-one error. * We want to ensure that @blocks_submitted is far enough ahead of @blocks_read to guarantee that there is in-flight I/O. Otherwise, we'll eventually end up waiting for pages that haven't been submitted, yet. * It's much easier to compare the number of blocks added with the number of blocks submitted to limit the maximum bio size. * Even with bio chaining, we can keep adding blocks until we reach the maximum bio size, as long as we stop at a page boundary. This simplifies the logic. Signed-off-by: Andreas Gruenbacher Reviewed-by: Bob Peterson Signed-off-by: Sasha Levin --- fs/gfs2/lops.c | 15 +-- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 8303b44a5068..d2ed4dc4434c 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -504,12 +504,12 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, unsigned int bsize = sdp->sd_sb.sb_bsize, off; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; unsigned int shift = PAGE_SHIFT - bsize_shift; - unsigned int max_bio_size = 2 * 1024 * 1024; + unsigned int max_blocks = 2 * 1024 * 1024 >> bsize_shift; struct gfs2_journal_extent *je; int sz, ret = 0; struct bio *bio = NULL; struct page *page = NULL; - bool bio_chained = false, done = false; + bool done = false; errseq_t since; memset(head, 0, sizeof(*head)); @@ -532,10 +532,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, off = 0; } - if (!bio || (bio_chained && !off) || - bio->bi_iter.bi_size >= max_bio_size) { - /* start new bio */ - } else { + if (bio && (off || block < blocks_submitted + max_blocks)) { sector_t sector = dblock << sdp->sd_fsb2bb_shift; if (bio_end_sector(bio) == sector) { @@ -548,19 +545,17 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, (PAGE_SIZE - off) >> bsize_shift; bio = gfs2_chain_bio(bio, blocks); - bio_chained = true; goto add_block_to_new_bio; } } if (bio) { - blocks_submitted = block + 1; + blocks_submitted = block; submit_bio(bio); } bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read); bio->bi_opf = REQ_OP_READ; - bio_chained = false; add_block_to_new_bio: sz = bio_add_page(bio, page, bsize, off); BUG_ON(sz != bsize); @@ -568,7 +563,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, off += bsize; if (off == PAGE_SIZE) page = NULL; - if (blocks_submitted < 2 * max_bio_size >> bsize_shift) { + if (blocks_submitted <= blocks_read + max_blocks) { /* Keep at least one bio in flight */ continue; } -- 2.25.1
[Cluster-devel] [GFS2 PATCH v2] gfs2: new slab for transactions
Hi, I've posted similar patches for this in the past, but they all had shortcomings. This is my latest version. There is a follow-on patch which I will post shortly. In the future, I want to add a new slab initialization function, but for now this is enough. Note that the use-after-free warnings in gfs2_trans_free() require that we now properly initialize the transaction ail lists. Bob Peterson ... This patch adds a new slab for gfs2 transactions. That allows us to have an initialization function and protect against some errors. Signed-off-by: Bob Peterson --- fs/gfs2/glops.c | 2 ++ fs/gfs2/log.c | 9 + fs/gfs2/main.c | 9 + fs/gfs2/trans.c | 22 ++ fs/gfs2/trans.h | 1 + fs/gfs2/util.c | 1 + fs/gfs2/util.h | 1 + 7 files changed, 37 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 9e9c7a4b8c66..0e4e6570adef 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -91,6 +91,8 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) memset(&tr, 0, sizeof(tr)); INIT_LIST_HEAD(&tr.tr_buf); INIT_LIST_HEAD(&tr.tr_databuf); + INIT_LIST_HEAD(&tr.tr_ail1_list); + INIT_LIST_HEAD(&tr.tr_ail2_list); tr.tr_revokes = atomic_read(&gl->gl_ail_count); if (!tr.tr_revokes) { diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index d6e5734651a4..79aa0fb4984e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -30,6 +30,7 @@ #include "util.h" #include "dir.h" #include "trace_gfs2.h" +#include "trans.h" static void gfs2_log_shutdown(struct gfs2_sbd *sdp); @@ -379,7 +380,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) list_del(&tr->tr_list); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); - kfree(tr); + gfs2_trans_free(sdp, tr); } spin_unlock(&sdp->sd_ail_lock); @@ -868,14 +869,14 @@ static void ail_drain(struct gfs2_sbd *sdp) gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list); gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); list_del(&tr->tr_list); - kfree(tr); + gfs2_trans_free(sdp, tr); } while (!list_empty(&sdp->sd_ail2_list)) { tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans, tr_list); gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); list_del(&tr->tr_list); - kfree(tr); + gfs2_trans_free(sdp, tr); } spin_unlock(&sdp->sd_ail_lock); } @@ -1007,7 +1008,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) trace_gfs2_log_flush(sdp, 0, flags); up_write(&sdp->sd_log_flush_lock); - kfree(tr); + gfs2_trans_free(sdp, tr); } /** diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index a1a295b739fb..733470ca6be9 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -143,6 +143,12 @@ static int __init init_gfs2_fs(void) if (!gfs2_qadata_cachep) goto fail_cachep7; + gfs2_trans_cachep = kmem_cache_create("gfs2_trans", + sizeof(struct gfs2_trans), + 0, 0, NULL); + if (!gfs2_trans_cachep) + goto fail_cachep8; + error = register_shrinker(&gfs2_qd_shrinker); if (error) goto fail_shrinker; @@ -194,6 +200,8 @@ static int __init init_gfs2_fs(void) fail_fs1: unregister_shrinker(&gfs2_qd_shrinker); fail_shrinker: + kmem_cache_destroy(gfs2_trans_cachep); +fail_cachep8: kmem_cache_destroy(gfs2_qadata_cachep); fail_cachep7: kmem_cache_destroy(gfs2_quotad_cachep); @@ -236,6 +244,7 @@ static void __exit exit_gfs2_fs(void) rcu_barrier(); mempool_destroy(gfs2_page_pool); + kmem_cache_destroy(gfs2_trans_cachep); kmem_cache_destroy(gfs2_qadata_cachep); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 9aed58f7368d..348ecd3fc167 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -37,7 +37,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) return -EROFS; - tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); + tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS); if (!tr) return -ENOMEM; @@ -53,6 +53,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, INIT_LIST_HEAD(&tr->tr_databuf); INIT_LIST_HEAD(&tr->tr_buf); + INIT_LIST_HEAD(&tr->tr_ail1_list); + INIT_LIST_HEAD(&tr->tr_ail2_list); + sb_start_intwrite(sdp->sd_vfs); error = gfs2_log_reserve(s
[Cluster-devel] [GFS2 PATCH 3/6] gfs2: instrumentation wrt log_flush stuck
This adds checks for gfs2_log_flush being stuck, similarly to the check in gfs2_ail1_flush. To faciliate this and make the strings easy to grep we move the ail1 emptying to its own function, empty_ail1_list. Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 34 +- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 0644e58c6191..fcc7f58d74f0 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -145,9 +145,6 @@ static void dump_ail_list(struct gfs2_sbd *sdp) struct gfs2_bufdata *bd; struct buffer_head *bh; - fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n", - current->journal_info ? 1 : 0); - list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { list_for_each_entry_reverse(bd, &tr->tr_ail1_list, bd_ail_st_list) { @@ -197,6 +194,8 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) restart: ret = 0; if (time_after(jiffies, flush_start + (HZ * 600))) { + fs_err(sdp, "Error: In %s for ten minutes! t=%d\n", + __func__, current->journal_info ? 1 : 0); dump_ail_list(sdp); goto out; } @@ -876,6 +875,28 @@ static void ail_drain(struct gfs2_sbd *sdp) spin_unlock(&sdp->sd_ail_lock); } +/** + * empty_ail1_list - try to start IO and empty the ail1 list + * @sdp: Pointer to GFS2 superblock + */ +static void empty_ail1_list(struct gfs2_sbd *sdp) +{ + unsigned long start = jiffies; + + for (;;) { + if (time_after(jiffies, start + (HZ * 600))) { + fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n", + __func__, current->journal_info ? 1 : 0); + dump_ail_list(sdp); + return; + } + gfs2_ail1_start(sdp); + gfs2_ail1_wait(sdp); + if (gfs2_ail1_empty(sdp, 0)) + return; + } +} + /** * gfs2_log_flush - flush incore transaction(s) * @sdp: the filesystem @@ -965,12 +986,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { if (!sdp->sd_log_idle) { - for (;;) { - gfs2_ail1_start(sdp); - gfs2_ail1_wait(sdp); - if (gfs2_ail1_empty(sdp, 0)) - break; - } + empty_ail1_list(sdp); if (gfs2_withdrawn(sdp)) goto out; atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ -- 2.26.2
[Cluster-devel] [GFS2 PATCH 2/6] gfs2: print mapping->nrpages in glock dump for address space glocks
This patch makes the glock dumps in debugfs print the number of pages (nrpages) for address space glocks. This will aid in debugging. Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 25 - 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index bf70e3b14938..9a5dadc93cfc 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1978,7 +1978,13 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) char gflags_buf[32]; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; + unsigned long nrpages = 0; + if (gl->gl_ops->go_flags & GLOF_ASPACE) { + struct address_space *mapping = gfs2_glock2aspace(gl); + + nrpages = mapping->nrpages; + } memset(fs_id_buf, 0, sizeof(fs_id_buf)); if (fsid && sdp) /* safety precaution */ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); @@ -1987,15 +1993,16 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " - "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state), - gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number, - gflags2str(gflags_buf, gl), - state2str(gl->gl_target), - state2str(gl->gl_demote_state), dtime, - atomic_read(&gl->gl_ail_count), - atomic_read(&gl->gl_revokes), - (int)gl->gl_lockref.count, gl->gl_hold_time); + "v:%d r:%d m:%ld p:%lu\n", + fs_id_buf, state2str(gl->gl_state), + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number, + gflags2str(gflags_buf, gl), + state2str(gl->gl_target), + state2str(gl->gl_demote_state), dtime, + atomic_read(&gl->gl_ail_count), + atomic_read(&gl->gl_revokes), + (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages); list_for_each_entry(gh, &gl->gl_holders, gh_list) dump_holder(seq, gh, fs_id_buf); -- 2.26.2
Re: [Cluster-devel] [PATCH 6/8] gfs2: instrumentation wrt log_flush stuck
Hi Bob, On Tue, May 26, 2020 at 3:05 PM Bob Peterson wrote: > This adds checks for gfs2_log_flush being stuck, similarly to the check > in gfs2_ail1_flush. > > Signed-off-by: Bob Peterson > --- > fs/gfs2/log.c | 14 +++--- > 1 file changed, 11 insertions(+), 3 deletions(-) > > diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c > index 1d51b4781bdd..636c82dda68b 100644 > --- a/fs/gfs2/log.c > +++ b/fs/gfs2/log.c > @@ -145,9 +145,6 @@ static void dump_ail_list(struct gfs2_sbd *sdp) > struct gfs2_bufdata *bd; > struct buffer_head *bh; > > - fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n", > - current->journal_info ? 1 : 0); > - > list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { > list_for_each_entry_reverse(bd, &tr->tr_ail1_list, > bd_ail_st_list) { > @@ -197,6 +194,8 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct > writeback_control *wbc) > restart: > ret = 0; > if (time_after(jiffies, flush_start + (HZ * 600))) { > + fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! " > + "t=%d\n", current->journal_info ? 1 : 0); > dump_ail_list(sdp); > goto out; > } > @@ -970,7 +969,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct > gfs2_glock *gl, u32 flags) > > if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { > if (!sdp->sd_log_idle) { > + unsigned long start = jiffies; > + > for (;;) { > + if (time_after(jiffies, start + (HZ * 600))) { This should probably have some rate limiting as well, for example: start = jiffies; I'm not sure what provides similar rate limiting in gfs2_ail1_flush. > + fs_err(sdp, "Error: In gfs2_log_flush > " > + "for 10 minutes! t=%d\n", > + current->journal_info ? 1 : 0); Please don't break the format string up like that; this makes grepping harder. > + dump_ail_list(sdp); > + break; > + } > gfs2_ail1_start(sdp); > gfs2_ail1_wait(sdp); > if (gfs2_ail1_empty(sdp, 0)) > -- > 2.26.2 >
Re: [Cluster-devel] [PATCH 0/8] Misc Patch Collection
Hi Bob, On Tue, May 26, 2020 at 3:07 PM Bob Peterson wrote: > Andreas expressed some concerns about some of the others. For example, he > didn't like that the new "status" sysfs file was taking "try" locks, but > if the lock is held, I don't know of a better way to do this. walking a linked list that's protected by a spin lock without holding that spin lock is so fundamentally broken that I don't see why we even need to discuss it. There's nothing that guarantees that we'll be able to walk the list. The spin_is_locked checks this patch adds for reporting the states of spin locks are practically useless as well. > He also expressed > a concern that the new file should be in debugfs rather than sysfs. > I'm open to opinions. Regardless of where it is, the new debug file is a > perfect candidate to include in sos reports. It may be better to include more relevant information in trace points. That way, we automatically get that information correlated with other trace events to see what was happening when. Thanks, Andreas
[Cluster-devel] [GFS2 PATCH v3] gfs2: new slab for transactions
Hi, I revised the patch description for Andreas: This patch adds a new slab for gfs2 transactions. That allows us to reduce kernel memory fragmentation, have better organization of data for analysis of vmcore dumps. A new centralized function is added to free the slab objects, and it exposes use-after-free by giving warnings if a transaction is freed while it still has bd elements attached to its buffers or ail lists. We make sure to initialize those transaction ail lists so we can check their integrity when freeing. At a later time, we should add a slab initialization function to make it more efficient, but for this initial patch I wanted to minimize the impact. Signed-off-by: Bob Peterson --- fs/gfs2/glops.c | 2 ++ fs/gfs2/log.c | 9 + fs/gfs2/main.c | 9 + fs/gfs2/trans.c | 22 ++ fs/gfs2/trans.h | 1 + fs/gfs2/util.c | 1 + fs/gfs2/util.h | 1 + 7 files changed, 37 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4862dae868a2..224fb3bd503c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -91,6 +91,8 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) memset(&tr, 0, sizeof(tr)); INIT_LIST_HEAD(&tr.tr_buf); INIT_LIST_HEAD(&tr.tr_databuf); + INIT_LIST_HEAD(&tr.tr_ail1_list); + INIT_LIST_HEAD(&tr.tr_ail2_list); tr.tr_revokes = atomic_read(&gl->gl_ail_count); if (!tr.tr_revokes) { diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index fcc7f58d74f0..64efa3f743af 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -30,6 +30,7 @@ #include "util.h" #include "dir.h" #include "trace_gfs2.h" +#include "trans.h" static void gfs2_log_shutdown(struct gfs2_sbd *sdp); @@ -378,7 +379,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) list_del(&tr->tr_list); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); - kfree(tr); + gfs2_trans_free(sdp, tr); } spin_unlock(&sdp->sd_ail_lock); @@ -863,14 +864,14 @@ static void ail_drain(struct gfs2_sbd *sdp) gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list); gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); list_del(&tr->tr_list); - kfree(tr); + gfs2_trans_free(sdp, tr); } while (!list_empty(&sdp->sd_ail2_list)) { tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans, tr_list); gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); list_del(&tr->tr_list); - kfree(tr); + gfs2_trans_free(sdp, tr); } spin_unlock(&sdp->sd_ail_lock); } @@ -1010,7 +1011,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) trace_gfs2_log_flush(sdp, 0, flags); up_write(&sdp->sd_log_flush_lock); - kfree(tr); + gfs2_trans_free(sdp, tr); } /** diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index a1a295b739fb..733470ca6be9 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -143,6 +143,12 @@ static int __init init_gfs2_fs(void) if (!gfs2_qadata_cachep) goto fail_cachep7; + gfs2_trans_cachep = kmem_cache_create("gfs2_trans", + sizeof(struct gfs2_trans), + 0, 0, NULL); + if (!gfs2_trans_cachep) + goto fail_cachep8; + error = register_shrinker(&gfs2_qd_shrinker); if (error) goto fail_shrinker; @@ -194,6 +200,8 @@ static int __init init_gfs2_fs(void) fail_fs1: unregister_shrinker(&gfs2_qd_shrinker); fail_shrinker: + kmem_cache_destroy(gfs2_trans_cachep); +fail_cachep8: kmem_cache_destroy(gfs2_qadata_cachep); fail_cachep7: kmem_cache_destroy(gfs2_quotad_cachep); @@ -236,6 +244,7 @@ static void __exit exit_gfs2_fs(void) rcu_barrier(); mempool_destroy(gfs2_page_pool); + kmem_cache_destroy(gfs2_trans_cachep); kmem_cache_destroy(gfs2_qadata_cachep); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index ffe840505082..aa7cd0abb369 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -37,7 +37,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) return -EROFS; - tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); + tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS); if (!tr) return -ENOMEM; @@ -53,6 +53,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, INIT_LIST_HEAD(&tr->tr_databuf); INIT_LIST_HEAD(&tr->tr_buf); + INIT_LIST_HEAD(&tr->t
[Cluster-devel] [GFS2 PATCH 5/6] gfs2: new slab for transactions
This patch adds a new slab for gfs2 transactions. That allows us to have an initialization function and protect against some errors. Signed-off-by: Bob Peterson --- fs/gfs2/glops.c | 2 ++ fs/gfs2/log.c | 9 + fs/gfs2/main.c | 9 + fs/gfs2/trans.c | 22 ++ fs/gfs2/trans.h | 1 + fs/gfs2/util.c | 1 + fs/gfs2/util.h | 1 + 7 files changed, 37 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4862dae868a2..224fb3bd503c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -91,6 +91,8 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) memset(&tr, 0, sizeof(tr)); INIT_LIST_HEAD(&tr.tr_buf); INIT_LIST_HEAD(&tr.tr_databuf); + INIT_LIST_HEAD(&tr.tr_ail1_list); + INIT_LIST_HEAD(&tr.tr_ail2_list); tr.tr_revokes = atomic_read(&gl->gl_ail_count); if (!tr.tr_revokes) { diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index fcc7f58d74f0..64efa3f743af 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -30,6 +30,7 @@ #include "util.h" #include "dir.h" #include "trace_gfs2.h" +#include "trans.h" static void gfs2_log_shutdown(struct gfs2_sbd *sdp); @@ -378,7 +379,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) list_del(&tr->tr_list); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); - kfree(tr); + gfs2_trans_free(sdp, tr); } spin_unlock(&sdp->sd_ail_lock); @@ -863,14 +864,14 @@ static void ail_drain(struct gfs2_sbd *sdp) gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list); gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); list_del(&tr->tr_list); - kfree(tr); + gfs2_trans_free(sdp, tr); } while (!list_empty(&sdp->sd_ail2_list)) { tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans, tr_list); gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); list_del(&tr->tr_list); - kfree(tr); + gfs2_trans_free(sdp, tr); } spin_unlock(&sdp->sd_ail_lock); } @@ -1010,7 +1011,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) trace_gfs2_log_flush(sdp, 0, flags); up_write(&sdp->sd_log_flush_lock); - kfree(tr); + gfs2_trans_free(sdp, tr); } /** diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index a1a295b739fb..733470ca6be9 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -143,6 +143,12 @@ static int __init init_gfs2_fs(void) if (!gfs2_qadata_cachep) goto fail_cachep7; + gfs2_trans_cachep = kmem_cache_create("gfs2_trans", + sizeof(struct gfs2_trans), + 0, 0, NULL); + if (!gfs2_trans_cachep) + goto fail_cachep8; + error = register_shrinker(&gfs2_qd_shrinker); if (error) goto fail_shrinker; @@ -194,6 +200,8 @@ static int __init init_gfs2_fs(void) fail_fs1: unregister_shrinker(&gfs2_qd_shrinker); fail_shrinker: + kmem_cache_destroy(gfs2_trans_cachep); +fail_cachep8: kmem_cache_destroy(gfs2_qadata_cachep); fail_cachep7: kmem_cache_destroy(gfs2_quotad_cachep); @@ -236,6 +244,7 @@ static void __exit exit_gfs2_fs(void) rcu_barrier(); mempool_destroy(gfs2_page_pool); + kmem_cache_destroy(gfs2_trans_cachep); kmem_cache_destroy(gfs2_qadata_cachep); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index ffe840505082..aa7cd0abb369 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -37,7 +37,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) return -EROFS; - tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); + tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS); if (!tr) return -ENOMEM; @@ -53,6 +53,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, INIT_LIST_HEAD(&tr->tr_databuf); INIT_LIST_HEAD(&tr->tr_buf); + INIT_LIST_HEAD(&tr->tr_ail1_list); + INIT_LIST_HEAD(&tr->tr_ail2_list); + sb_start_intwrite(sdp->sd_vfs); error = gfs2_log_reserve(sdp, tr->tr_reserved); @@ -65,7 +68,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, fail: sb_end_intwrite(sdp->sd_vfs); - kfree(tr); + kmem_cache_free(gfs2_trans_cachep, tr); return error; } @@ -93,7 +96,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (!test_bit(TR_TOUCHED, &tr->tr_flags)) { gfs2_log_release(sdp, tr->tr_rese
Re: [Cluster-devel] [PATCH 6/8] gfs2: instrumentation wrt log_flush stuck
- Original Message - > On Fri, Jun 5, 2020 at 4:49 PM Bob Peterson wrote: > > Hi Andreas, > > > > - Original Message - > > (snip) > > > > @@ -970,7 +969,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct > > > > gfs2_glock *gl, u32 flags) > > > > > > > > if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { > > > > if (!sdp->sd_log_idle) { > > > > + unsigned long start = jiffies; > > > > + > > > > for (;;) { > > > > + if (time_after(jiffies, start + (HZ * > > > > 600))) { > > > > > > This should probably have some rate limiting as well, for example: > > > > Seems unnecessary. If the log flush gets stuck, the message will be printed > > once, and at most every 10 minutes. > > No, after ten minutes, the message will actually be printed for each > iteration of the loop. That's exactly why I was suggesting the rate > limiting. No, after ten minutes it dumps the ail list so you can see the problem and exits the loop with "break;". The next time it enters the loop, it starts with a new value of start which doesn't expire for another ten minutes. Bob
Re: [Cluster-devel] [PATCH 6/8] gfs2: instrumentation wrt log_flush stuck
On Fri, Jun 5, 2020 at 6:15 PM Bob Peterson wrote: > - Original Message - > > On Fri, Jun 5, 2020 at 4:49 PM Bob Peterson wrote: > > > Hi Andreas, > > > > > > - Original Message - > > > (snip) > > > > > @@ -970,7 +969,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct > > > > > gfs2_glock *gl, u32 flags) > > > > > > > > > > if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { > > > > > if (!sdp->sd_log_idle) { > > > > > + unsigned long start = jiffies; > > > > > + > > > > > for (;;) { > > > > > + if (time_after(jiffies, start + (HZ * > > > > > 600))) { > > > > > > > > This should probably have some rate limiting as well, for example: > > > > > > Seems unnecessary. If the log flush gets stuck, the message will be > > > printed > > > once, and at most every 10 minutes. > > > > No, after ten minutes, the message will actually be printed for each > > iteration of the loop. That's exactly why I was suggesting the rate > > limiting. > > No, after ten minutes it dumps the ail list so you can see the problem > and exits the loop with "break;". > > The next time it enters the loop, it starts with a new value of start > which doesn't expire for another ten minutes. Ok, I misread. Thanks, Andreas
[Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection
I've revised my recent patch set based on feedback from Andreas. Here is my latest. Changes from last time are: 1. Removed the spin_trylock related info from the status sysfs file as per Andreas's feedback. Although valuable to have, I see his point. 2. Removed "gfs2: Add new trace point for glock ail management". I still think this would be useful to have, but perhaps only compiled in with a new debug kernel option. For now, leave it out. 3. Removed "gfs2: add memory barriers to gfs2_glock_remove_revoke". It is not needed. 4. Removed "gfs2: Only do glock put in gfs2_create_inode for free inodes". It was pushed to the repo separately. 5. Revised "gfs2: instrumentation wrt log_flush stuck" moving that code to a new function, which makes the err string contiguous for grep. 6. Added "gfs2: new slab for transactions" which is revised to fix bugs that were in the old version. 7. Added "gfs2: fix use-after-free on transaction ail lists" that fixes a problem that comes to light with the previous patch. 8. Patches 1/8 and 2/8 are omitted, since they were pushed separately. Bob Peterson (6): gfs2: Add new sysfs file for gfs2 status gfs2: print mapping->nrpages in glock dump for address space glocks gfs2: instrumentation wrt log_flush stuck gfs2: introduce new gfs2_glock_assert_withdraw gfs2: new slab for transactions gfs2: fix use-after-free on transaction ail lists fs/gfs2/glock.c | 32 +++- fs/gfs2/glock.h | 9 +++ fs/gfs2/glops.c | 2 ++ fs/gfs2/log.c | 54 fs/gfs2/main.c | 9 +++ fs/gfs2/sys.c | 65 + fs/gfs2/trans.c | 22 ++--- fs/gfs2/trans.h | 1 + fs/gfs2/util.c | 1 + fs/gfs2/util.h | 1 + 10 files changed, 165 insertions(+), 31 deletions(-) -- 2.26.2
Re: [Cluster-devel] [PATCH 7/8] gfs2: Add new trace point for glock ail management
Bob, this very much looks like a debugging trace point that has no business in production code. Thanks, Andreas
Re: [Cluster-devel] [PATCH 6/8] gfs2: instrumentation wrt log_flush stuck
On Fri, Jun 5, 2020 at 4:49 PM Bob Peterson wrote: > Hi Andreas, > > - Original Message - > (snip) > > > @@ -970,7 +969,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct > > > gfs2_glock *gl, u32 flags) > > > > > > if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { > > > if (!sdp->sd_log_idle) { > > > + unsigned long start = jiffies; > > > + > > > for (;;) { > > > + if (time_after(jiffies, start + (HZ * > > > 600))) { > > > > This should probably have some rate limiting as well, for example: > > Seems unnecessary. If the log flush gets stuck, the message will be printed > once, and at most every 10 minutes. No, after ten minutes, the message will actually be printed for each iteration of the loop. That's exactly why I was suggesting the rate limiting. > > start = jiffies; > > > > I'm not sure what provides similar rate limiting in gfs2_ail1_flush. > > > > > + fs_err(sdp, "Error: In > > > gfs2_log_flush " > > > + "for 10 minutes! t=%d\n", > > > + current->journal_info ? 1 : > > > 0); > > > > Please don't break the format string up like that; this makes grepping > > harder. > > How do you propose I break present it without going over 80 character? > I could #define it as a constant, or put it into a separate function > that has less indentation, I suppose. There *is* no hard 80 character limit. The checkpatch warnings shouldn't push us into making our code worse. Also note that since commit bdc48fa11e, checkpatch will only warn about lines longer than 100 characters. Thanks, Andreas
Re: [Cluster-devel] [PATCH 6/8] gfs2: instrumentation wrt log_flush stuck
Hi Andreas, - Original Message - (snip) > > @@ -970,7 +969,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct > > gfs2_glock *gl, u32 flags) > > > > if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { > > if (!sdp->sd_log_idle) { > > + unsigned long start = jiffies; > > + > > for (;;) { > > + if (time_after(jiffies, start + (HZ * > > 600))) { > > This should probably have some rate limiting as well, for example: Seems unnecessary. If the log flush gets stuck, the message will be printed once, and at most every 10 minutes. > start = jiffies; > > I'm not sure what provides similar rate limiting in gfs2_ail1_flush. > > > + fs_err(sdp, "Error: In > > gfs2_log_flush " > > + "for 10 minutes! t=%d\n", > > + current->journal_info ? 1 : > > 0); > > Please don't break the format string up like that; this makes grepping > harder. How do you propose I break present it without going over 80 character? I could #define it as a constant, or put it into a separate function that has less indentation, I suppose. Bob
[Cluster-devel] [GFS2 PATCH 6/6] gfs2: fix use-after-free on transaction ail lists
Before this patch, transactions could be merged into the system transaction by function gfs2_merge_trans(), but then it is not considered attached. The caller, log_refund, only sets TR_ATTACHED if the transaction is not merged. Later, in function, gfs2_trans_end, if TR_ATTACHED is not set (because it was merged) the transaction is freed by gfs2_trans_free(). However, by the time this happens, the log flush mechanism may have queued bd elements to the ail list, which means tr_ail1_list may not be empty, and the flush may forget about the bd elements altogether (memory leak, because it focuses on the sdp transaction instead) or worse, reference them after the transaction has been freed. Although I've not seen any serious consequences, the problem becomes apparent with the previous patch's addition of: gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); to function gfs2_trans_free(). This patch adds logic into gfs2_merge_trans() to move the merged transaction's ail lists to the sdp transaction. This prevents the use-after-free. To facilitate this, we pass sdp into the function instead of the transaction itself. Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 64efa3f743af..5a2bc70b0859 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1020,8 +1020,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) * @new: New transaction to be merged */ -static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) +static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new) { + struct gfs2_trans *old = sdp->sd_log_tr; + WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags)); old->tr_num_buf_new += new->tr_num_buf_new; @@ -1033,6 +1035,11 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); list_splice_tail_init(&new->tr_buf, &old->tr_buf); + + spin_lock(&sdp->sd_ail_lock); + list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list); + list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list); + spin_unlock(&sdp->sd_ail_lock); } static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) @@ -1044,7 +1051,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_log_lock(sdp); if (sdp->sd_log_tr) { - gfs2_merge_trans(sdp->sd_log_tr, tr); + gfs2_merge_trans(sdp, tr); } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags)); sdp->sd_log_tr = tr; -- 2.26.2
[Cluster-devel] [GFS2 PATCH 1/6] gfs2: Add new sysfs file for gfs2 status
This patch adds a new file: /sys/fs/gfs2/*/status which will report the status of the file system. Catting this file dumps the current status of the file system according to various superblock variables. For example: Journal Checked: 1 Journal Live: 1 Withdrawn: 0 No barriers: 0 No recovery: 0 Demote:0 No Journal ID: 1 RO Recovery: 0 Skip DLM Unlock: 0 Force AIL Flush: 0 FS Frozen: 0 Withdrawing: 0 Withdraw In Prog: 0 Remote Withdraw: 0 Withdraw Recovery: 0 sd_log_lock held: 0 statfs_spin held: 0 sd_rindex_spin:0 sd_jindex_spin:0 sd_trunc_lock: 0 sd_bitmap_lock:0 sd_ordered_lock: 0 sd_ail_lock: 0 sd_log_error: 0 sd_log_flush_lock: 0 sd_log_flush_lock: 1 sd_log_in_flight: 1 Signed-off-by: Bob Peterson --- fs/gfs2/sys.c | 65 +++ 1 file changed, 65 insertions(+) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d28c41bd69b0..572353f0b41f 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -63,6 +63,69 @@ static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev)); } +static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) +{ + unsigned long f = sdp->sd_flags; + ssize_t s; + + s = snprintf(buf, PAGE_SIZE, +"Journal Checked: %d\n" +"Journal Live: %d\n" +"Withdrawn: %d\n" +"No barriers: %d\n" +"No recovery: %d\n" +"Demote:%d\n" +"No Journal ID: %d\n" +"RO Recovery: %d\n" +"Skip DLM Unlock: %d\n" +"Force AIL Flush: %d\n" +"FS Frozen: %d\n" +"Withdrawing: %d\n" +"Withdraw In Prog: %d\n" +"Remote Withdraw: %d\n" +"Withdraw Recovery: %d\n" +"sd_log_lock held: %d\n" +"statfs_spin held: %d\n" +"sd_rindex_spin:%d\n" +"sd_jindex_spin:%d\n" +"sd_trunc_lock: %d\n" +"sd_bitmap_lock:%d\n" +"sd_ordered_lock: %d\n" +"sd_ail_lock: %d\n" +"sd_log_error: %d\n" +"sd_log_flush_lock: %d\n" +"sd_log_num_revoke: %u\n" +"sd_log_in_flight: %d\n", +test_bit(SDF_JOURNAL_CHECKED, &f), +test_bit(SDF_JOURNAL_LIVE, &f), +test_bit(SDF_WITHDRAWN, &f), +test_bit(SDF_NOBARRIERS, &f), +test_bit(SDF_NORECOVERY, &f), +test_bit(SDF_DEMOTE, &f), +test_bit(SDF_NOJOURNALID, &f), +test_bit(SDF_RORECOVERY, &f), +test_bit(SDF_SKIP_DLM_UNLOCK, &f), +test_bit(SDF_FORCE_AIL_FLUSH, &f), +test_bit(SDF_FS_FROZEN, &f), +test_bit(SDF_WITHDRAWING, &f), +test_bit(SDF_WITHDRAW_IN_PROG, &f), +test_bit(SDF_REMOTE_WITHDRAW, &f), +test_bit(SDF_WITHDRAW_RECOVERY, &f), +spin_is_locked(&sdp->sd_log_lock), +spin_is_locked(&sdp->sd_statfs_spin), +spin_is_locked(&sdp->sd_rindex_spin), +spin_is_locked(&sdp->sd_jindex_spin), +spin_is_locked(&sdp->sd_trunc_lock), +spin_is_locked(&sdp->sd_bitmap_lock), +spin_is_locked(&sdp->sd_ordered_lock), +spin_is_locked(&sdp->sd_ail_lock), +sdp->sd_log_error, +rwsem_is_locked(&sdp->sd_log_flush_lock), +sdp->sd_log_num_revoke, +atomic_read(&sdp->sd_log_in_flight)); + return s; +} + static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) { return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); @@ -283,6 +346,7 @@ GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store); GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store); GFS2_ATTR(demote_rq, 0200, NULL, demote_rq_store); +GFS2_ATTR(status, 0444, status_show, NULL); static struct attribute *gfs2_attrs[] = { &gfs2_attr_id.attr, @@ -295,6 +359,7 @@ static struct attribute *gfs2_attrs[] = { &gfs2_attr_quota_refresh_user.attr, &gfs2_attr_quota_refresh_group.attr, &gfs2_attr_demote_rq.attr, + &gfs2_attr_status.attr, NULL, }; ATTRIBUTE_GROUPS(gfs2); -- 2.26.2
[Cluster-devel] [GFS2 PATCH 4/6] gfs2: introduce new gfs2_glock_assert_withdraw
Before this patch, asserts based on glocks did not print the glock with the error. This patch introduces a new macro, gfs2_glock_assert_withdraw which first prints the glock, then takes the assert. This also changes a few glock asserts to the new macro. Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 7 --- fs/gfs2/glock.h | 9 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9a5dadc93cfc..64541d8bf9ad 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -164,7 +164,7 @@ void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - BUG_ON(atomic_read(&gl->gl_revokes)); + gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); @@ -626,7 +626,8 @@ __acquires(&gl->gl_lockref.lock) */ if ((atomic_read(&gl->gl_ail_count) != 0) && (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { - gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count)); + gfs2_glock_assert_warn(gl, + !atomic_read(&gl->gl_ail_count)); gfs2_dump_glock(NULL, gl, true); } glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); @@ -1836,7 +1837,7 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip) int ret; ret = gfs2_truncatei_resume(ip); - gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0); + gfs2_glock_assert_withdraw(gl, ret == 0); spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_LOCK, &gl->gl_flags); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index b8adaf80e4c5..1c547dff7c57 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -205,6 +205,15 @@ extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \ gfs2_dump_glock(NULL, gl, true);\ BUG(); } } while(0) +#define gfs2_glock_assert_warn(gl, x) do { if (unlikely(!(x))) { \ + gfs2_dump_glock(NULL, gl, true);\ + gfs2_assert_warn((gl)->gl_name.ln_sbd, (x)); } } \ + while (0) +#define gfs2_glock_assert_withdraw(gl, x) do { if (unlikely(!(x))) { \ + gfs2_dump_glock(NULL, gl, true);\ + gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \ + while (0) + extern __printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); -- 2.26.2