Before this patch quota function bh_get called gfs2_iomap_get after it
had locked the sd_quota_mutex. That's a problem because that holds the
i_rw_mutex, and that lock order is different from other code that
locks i_rw_mutex first, then the sd_quota_mutex:

punch_hole
   sweep_bh_for_rgrps
      down_write(&ip->i_rw_mutex)
      ...
      __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
   gfs2_quota_change
      do_qc
         lock(&sdp->sd_quota_mutex);

This patch changes the order of events to this:

1. If qd->qd_bh_count is zero (we're likely but not guaranteed to be
   the first qd holder) it calls gfs2_iomap_get to determine which
   block should be read.
2. Lock the sd_quota_mutex
3. If qd->qd_bh_count is still zero (now it's guaranteed) it reads
   in the necessary block.
4. If qd_bh_count is no longer zero (because it wasn't holding the
   mutex) the iomap_get was wasted time, so return.

The case in which it wastes time (step 4) should be very rare and
only occur when sd_quota_mutex is contended. But this avoids the ABBA
deadlock.

Signed-off-by: Bob Peterson <rpete...@redhat.com>
---
 fs/gfs2/quota.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 91bc3affe460..fe98b2a6c0b6 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -368,9 +368,23 @@ static int bh_get(struct gfs2_quota_data *qd)
        struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
        unsigned int block, offset;
        struct buffer_head *bh;
-       struct iomap iomap = { };
+       struct iomap iomap = { .addr = IOMAP_NULL_ADDR, };
        int error;
 
+       block = qd->qd_slot / sdp->sd_qc_per_block;
+       offset = qd->qd_slot % sdp->sd_qc_per_block;
+
+retry:
+       if (!qd->qd_bh_count) {
+               error = gfs2_iomap_get(sdp->sd_qc_inode,
+                                (loff_t)block << sdp->sd_qc_inode->i_blkbits,
+                                      sdp->sd_sb.sb_bsize, &iomap);
+               if (error)
+                       return error;
+               if (iomap.addr == IOMAP_NULL_ADDR)
+                       return -ENOENT;
+       }
+
        mutex_lock(&sdp->sd_quota_mutex);
 
        if (qd->qd_bh_count++) {
@@ -378,17 +392,18 @@ static int bh_get(struct gfs2_quota_data *qd)
                return 0;
        }
 
-       block = qd->qd_slot / sdp->sd_qc_per_block;
-       offset = qd->qd_slot % sdp->sd_qc_per_block;
-
-       error = gfs2_iomap_get(sdp->sd_qc_inode,
-                              (loff_t)block << sdp->sd_qc_inode->i_blkbits,
-                              sdp->sd_sb.sb_bsize, &iomap);
-       if (error)
-               goto fail;
-       if (iomap.addr == IOMAP_NULL_ADDR) {
-               error = -ENOENT;
-               goto fail;
+       /*
+        * Make sure we called iomap_get. If qd_bh_count was non-zero at
+        * the start of the function, but was changed to 0 (by someone else's
+        * decrement) and we discover it after we acquired the mutex, we will
+        * have skipped the call to gfs2_iomap_get() and therefore don't know
+        * the block number we need to read. In that case we need to start
+        * over to get the block number.
+        */
+       if (unlikely(iomap.addr == IOMAP_NULL_ADDR)) {
+               qd->qd_bh_count--;
+               mutex_unlock(&sdp->sd_quota_mutex);
+               goto retry;
        }
 
        error = gfs2_meta_read(ip->i_gl, iomap.addr >> 
sdp->sd_qc_inode->i_blkbits,
-- 
2.34.1

Reply via email to