jenkins-bot has submitted this change and it was merged.
Change subject: Recover better from aggregator server network partitions
......................................................................
Recover better from aggregator server network partitions
Change-Id: Ia54f5803dcdb03f757809d86e51bd486f0351a3e
---
M redisJobRunnerService
1 file changed, 10 insertions(+), 5 deletions(-)
Approvals:
Ori.livneh: Looks good to me, approved
jenkins-bot: Verified
diff --git a/redisJobRunnerService b/redisJobRunnerService
index 6fb6515..1f08844 100755
--- a/redisJobRunnerService
+++ b/redisJobRunnerService
@@ -369,7 +369,8 @@
/**
* Recycle or destroy any jobs that have been claimed for too long
* and release any ready delayed jobs into the queue. Also abandon
- * and prune out jobs that failed too many times.
+ * and prune out jobs that failed too many times. This updates the
+ * aggregator server as necessary.
*
* @note: similar to JobQueueRedis.php periodic tasks method
* @return int|bool Number of jobs recycled/deleted/undelayed/abandoned
(false if not run)
@@ -405,7 +406,7 @@
<<<LUA
local kClaimed, kAttempts, kUnclaimed, kData, kAbandoned,
kDelayed = unpack(KEYS)
local rClaimCutoff, rPruneCutoff, rAttempts, rTime =
unpack(ARGV)
- local released,abandoned,pruned,undelayed = 0,0,0,0
+ local released,abandoned,pruned,undelayed,ready = 0,0,0,0,0
-- Get all non-dead jobs that have an expired claim on them.
-- The score for each item is the last claim timestamp (UNIX).
local staleClaims =
redis.call('zRangeByScore',kClaimed,0,rClaimCutoff)
@@ -441,7 +442,8 @@
redis.call('zRem',kDelayed,id)
end
undelayed = #ids
- return {released,abandoned,pruned,undelayed}
+ ready = redis.call('lSize',kUnclaimed)
+ return {released,abandoned,pruned,undelayed,ready}
LUA;
$ok = true;
@@ -513,8 +515,11 @@
}
list( $qType, $qWiki ) =
$paramsByQueue[$i]['queue'];
$tasksRun += array_sum( $result
);
- list( $released, $abandoned,
$pruned, $undelayed ) = $result;
- if ( $released > 0 ||
$undelayed > 0 ) {
+ list( $released, $abandoned,
$pruned, $undelayed, $ready ) = $result;
+ if ( $released > 0 ||
$undelayed > 0 || $ready > 0 ) {
+ // Update the map in
the aggregator as needed. This checks
+ // $ready to handle
lost aggregator updates as well as to
+ // merge after network
partitions that caused fail-over.
$mapSet[$this->encQueueName( $qType, $qWiki )] = $now;
}
$this->incrStats(
"job-recycle.$qType.$qWiki", $released );
--
To view, visit https://gerrit.wikimedia.org/r/167422
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ia54f5803dcdb03f757809d86e51bd486f0351a3e
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/services/jobrunner
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Ori.livneh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits