Hi,

On 31/01/2019 10:55, Ross Lagerwall wrote:
Under certain conditions, lru_count may drop below zero resulting in
a large amount of log spam like this:

vmscan: shrink_slab: gfs2_dump_glock+0x3b0/0x630 [gfs2] \
     negative objects to delete nr=-1

This happens as follows:
1) A glock is moved from lru_list to the dispose list and lru_count is
    decremented.
2) The dispose function calls cond_resched() and drops the lru lock.
3) Another thread takes the lru lock and tries to add the same glock to
    lru_list, checking if the glock is on an lru list.
4) It is on a list (actually the dispose list) and so it avoids
    incrementing lru_count.
5) The glock is moved to lru_list.
5) The original thread doesn't dispose it because it has been re-added
    to the lru list but the lru_count has still decreased by one.

Fix by checking if the LRU flag is set on the glock rather than checking
if the glock is on some list and rearrange the code so that the LRU flag
is added/removed precisely when the glock is added/removed from lru_list.

Signed-off-by: Ross Lagerwall <ross.lagerw...@citrix.com>

I'm glad we've got to the bottom of that one. Excellent work debugging that! Many thanks for the fix,

Steve.


---
  fs/gfs2/glock.c | 16 +++++++++-------
  1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b92740edc416..53e6c7e0c1b3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -185,13 +185,14 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
  {
        spin_lock(&lru_lock);
- if (!list_empty(&gl->gl_lru))
-               list_del_init(&gl->gl_lru);
-       else
+       list_del(&gl->gl_lru);
+       list_add_tail(&gl->gl_lru, &lru_list);
+
+       if (!test_bit(GLF_LRU, &gl->gl_flags)) {
+               set_bit(GLF_LRU, &gl->gl_flags);
                atomic_inc(&lru_count);
+       }
- list_add_tail(&gl->gl_lru, &lru_list);
-       set_bit(GLF_LRU, &gl->gl_flags);
        spin_unlock(&lru_lock);
  }
@@ -201,7 +202,7 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
                return;
spin_lock(&lru_lock);
-       if (!list_empty(&gl->gl_lru)) {
+       if (test_bit(GLF_LRU, &gl->gl_flags)) {
                list_del_init(&gl->gl_lru);
                atomic_dec(&lru_count);
                clear_bit(GLF_LRU, &gl->gl_flags);
@@ -1456,6 +1457,7 @@ __acquires(&lru_lock)
                if (!spin_trylock(&gl->gl_lockref.lock)) {
  add_back_to_lru:
                        list_add(&gl->gl_lru, &lru_list);
+                       set_bit(GLF_LRU, &gl->gl_flags);
                        atomic_inc(&lru_count);
                        continue;
                }
@@ -1463,7 +1465,6 @@ __acquires(&lru_lock)
                        spin_unlock(&gl->gl_lockref.lock);
                        goto add_back_to_lru;
                }
-               clear_bit(GLF_LRU, &gl->gl_flags);
                gl->gl_lockref.count++;
                if (demote_ok(gl))
                        handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1498,6 +1499,7 @@ static long gfs2_scan_glock_lru(int nr)
                if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
                        list_move(&gl->gl_lru, &dispose);
                        atomic_dec(&lru_count);
+                       clear_bit(GLF_LRU, &gl->gl_flags);
                        freed++;
                        continue;
                }

Reply via email to