There are two problems in dlm_run_purgelist 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge the same lockres instead of trying the next lockres.
2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres. spinlock is reacquired but in this window lockres can get reused. This leads to BUG. This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the lockres spinlock protecting it from getting reused. Signed-off-by: Srinivas Eeda <[email protected]> Signed-off-by: Sunil Mushran <[email protected]> --- dlmcommon.h | 2 - dlmthread.c | 72 +++++++++++++++++++++++++++++------------------------------- 2 files changed, 35 insertions(+), 39 deletions(-) diff -upr ocfs2-1.2.9-7/fs/ocfs2/dlm/dlmthread.c ocfs2-1.2.9-8/fs/ocfs2/dlm/dlmthread.c --- ocfs2-1.2.9-7/fs/ocfs2/dlm/dlmthread.c 2010-06-26 14:40:32.000000000 +0800 +++ ocfs2-1.2.9-8/fs/ocfs2/dlm/dlmthread.c 2010-06-26 18:44:14.000000000 +0800 @@ -156,29 +156,23 @@ void dlm_lockres_calc_usage(struct dlm_c spin_unlock(&dlm->spinlock); } -int dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) +static void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { int master; int ret = 0; - spin_lock(&res->spinlock); - if (!__dlm_lockres_unused(res)) { - __dlm_print_one_lock_resource(res); - spin_unlock(&res->spinlock); - mlog(0, "%s:%.*s: tried to purge but not unused\n", - dlm->name, res->lockname.len, res->lockname.name); - return -ENOTEMPTY; - } + assert_spin_locked(&dlm->spinlock); + assert_spin_locked(&res->spinlock); + master = (res->owner == dlm->node_num); - if (!master) - res->state |= DLM_LOCK_RES_DROPPING_REF; - spin_unlock(&res->spinlock); mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, res->lockname.name, master); if (!master) { + res->state |= DLM_LOCK_RES_DROPPING_REF; /* drop spinlock... retake below */ + spin_unlock(&res->spinlock); spin_unlock(&dlm->spinlock); spin_lock(&res->spinlock); @@ -196,31 +190,35 @@ int dlm_purge_lockres(struct dlm_ctxt *d mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", dlm->name, res->lockname.len, res->lockname.name, ret); spin_lock(&dlm->spinlock); + spin_lock(&res->spinlock); } - spin_lock(&res->spinlock); if (!list_empty(&res->purge)) { mlog(0, "removing lockres %.*s:%p from purgelist, " "master = %d\n", res->lockname.len, res->lockname.name, res, master); list_del_init(&res->purge); - spin_unlock(&res->spinlock); dlm_lockres_put(res); dlm->purge_count--; - } else { - spin_unlock(&res->spinlock); + } + + if (!__dlm_lockres_unused(res)) { + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", + dlm->name, res->lockname.len, res->lockname.name); + __dlm_print_one_lock_resource(res); + BUG(); } + __dlm_unhash_lockres(res); /* lockres is not in the hash now. drop the flag and wake up * any processes waiting in dlm_get_lock_resource. */ if (!master) { - spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_DROPPING_REF; spin_unlock(&res->spinlock); wake_up(&res->wq); - } - return 0; + } else + spin_unlock(&res->spinlock); } static void dlm_run_purge_list(struct dlm_ctxt *dlm, @@ -239,18 +237,6 @@ static void dlm_run_purge_list(struct dl lockres = list_entry(dlm->purge_list.next, struct dlm_lock_resource, purge); - /* Status of the lockres *might* change so double - * check. If the lockres is unused, holding the dlm - * spinlock will prevent people from getting and more - * refs on it -- there's no need to keep the lockres - * spinlock. */ - spin_lock(&lockres->spinlock); - unused = __dlm_lockres_unused(lockres); - spin_unlock(&lockres->spinlock); - - if (!unused) - continue; - purge_jiffies = lockres->last_used + msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); @@ -261,16 +247,28 @@ static void dlm_run_purge_list(struct dl * in tail order, we can stop at the first * unpurgable resource -- anyone added after * him will have a greater last_used value */ + spin_unlock(&lockres->spinlock); break; } - dlm_lockres_get(lockres); - - /* This may drop and reacquire the dlm spinlock if it - * has to do migration. */ - if (dlm_purge_lockres(dlm, lockres)) - BUG(); + /* Status of the lockres *might* change so double + * check. If the lockres is unused, holding the dlm + * spinlock will prevent people from getting and more + * refs on it. + */ + unused = __dlm_lockres_unused(lockres); + if (!unused || + (lockres->state & DLM_LOCK_RES_MIGRATING)) { + mlog(0, "lockres %s:%.*s: is in use or " + "being remastered\n", dlm->name, + lockres->lockname.len, lockres->lockname.name); + list_move_tail(&dlm->purge_list, &lockres->purge); + spin_unlock(&lockres->spinlock); + continue; + } + dlm_lockres_get(lockres); + dlm_purge_lockres(dlm, lockres); dlm_lockres_put(lockres); /* Avoid adding any scheduling latencies */ diff -urp ocfs2-1.2.9-7/fs/ocfs2/dlm/dlmcommon.h ocfs2-1.2.9-8/fs/ocfs2/dlm/dlmcommon.h --- ocfs2-1.2.9-7/fs/ocfs2/dlm/dlmcommon.h 2010-06-26 14:40:32.000000000 +0800 +++ ocfs2-1.2.9-8/fs/ocfs2/dlm/dlmcommon.h 2010-06-26 18:57:19.000000000 +0800 @@ -753,8 +753,6 @@ void __dlm_lockres_calc_usage(struct dlm struct dlm_lock_resource *res); void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); -int dlm_purge_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *lockres); void dlm_lockres_get(struct dlm_lock_resource *res); void dlm_lockres_put(struct dlm_lock_resource *res); void __dlm_unhash_lockres(struct dlm_lock_resource *res); _______________________________________________ Ocfs2-devel mailing list [email protected] http://oss.oracle.com/mailman/listinfo/ocfs2-devel
