------------------------------------------------------------ revno: 644 revision-id: [EMAIL PROTECTED] parent: [EMAIL PROTECTED] committer: Andrew Tridgell <[EMAIL PROTECTED]> branch nick: tridge timestamp: Fri 2007-10-05 12:01:40 +1000 message: we are the culprit if we can't get the reclock modified: server/ctdb_recoverd.c recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1 === modified file 'server/ctdb_recoverd.c' --- a/server/ctdb_recoverd.c 2007-09-24 00:52:26 +0000 +++ b/server/ctdb_recoverd.c 2007-10-05 02:01:40 +0000 @@ -724,6 +724,23 @@ return generation; } + +/* + remember the trouble maker + */ +static void ctdb_set_culprit(struct ctdb_recoverd *rec, uint32_t culprit) +{ + struct ctdb_context *ctdb = rec->ctdb; + + if (rec->last_culprit != culprit || + timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) { + /* either a new node is the culprit, or we've decide to forgive them */ + rec->last_culprit = culprit; + rec->first_recover_time = timeval_current(); + rec->culprit_counter = 0; + } + rec->culprit_counter++; +} /* we are the recmaster, and recovery is needed - start a recovery run @@ -741,14 +758,7 @@ /* if recovery fails, force it again */ rec->need_recovery = true; - if (rec->last_culprit != culprit || - timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) { - /* either a new node is the culprit, or we've decide to forgive them */ - rec->last_culprit = culprit; - rec->first_recover_time = timeval_current(); - rec->culprit_counter = 0; - } - rec->culprit_counter++; + ctdb_set_culprit(rec, culprit); if (rec->culprit_counter > 2*nodemap->num) { DEBUG(0,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n", @@ -758,6 +768,7 @@ } if (!ctdb_recovery_lock(ctdb, true)) { + ctdb_set_culprit(rec, pnn); DEBUG(0,("Unable to get recovery lock - aborting recovery\n")); return -1; }