jenkins-bot has submitted this change and it was merged.

Change subject: Recover better from aggregator server network partitions
......................................................................


Recover better from aggregator server network partitions

Change-Id: Ia54f5803dcdb03f757809d86e51bd486f0351a3e
---
M redisJobRunnerService
1 file changed, 10 insertions(+), 5 deletions(-)

Approvals:
  Ori.livneh: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/redisJobRunnerService b/redisJobRunnerService
index 6fb6515..1f08844 100755
--- a/redisJobRunnerService
+++ b/redisJobRunnerService
@@ -369,7 +369,8 @@
        /**
         * Recycle or destroy any jobs that have been claimed for too long
         * and release any ready delayed jobs into the queue. Also abandon
-        * and prune out jobs that failed too many times.
+        * and prune out jobs that failed too many times. This updates the
+        * aggregator server as necessary.
         *
         * @note: similar to JobQueueRedis.php periodic tasks method
         * @return int|bool Number of jobs recycled/deleted/undelayed/abandoned 
(false if not run)
@@ -405,7 +406,7 @@
 <<<LUA
                local kClaimed, kAttempts, kUnclaimed, kData, kAbandoned, 
kDelayed = unpack(KEYS)
                local rClaimCutoff, rPruneCutoff, rAttempts, rTime = 
unpack(ARGV)
-               local released,abandoned,pruned,undelayed = 0,0,0,0
+               local released,abandoned,pruned,undelayed,ready = 0,0,0,0,0
                -- Get all non-dead jobs that have an expired claim on them.
                -- The score for each item is the last claim timestamp (UNIX).
                local staleClaims = 
redis.call('zRangeByScore',kClaimed,0,rClaimCutoff)
@@ -441,7 +442,8 @@
                        redis.call('zRem',kDelayed,id)
                end
                undelayed = #ids
-               return {released,abandoned,pruned,undelayed}
+               ready = redis.call('lSize',kUnclaimed)
+               return {released,abandoned,pruned,undelayed,ready}
 LUA;
 
                $ok = true;
@@ -513,8 +515,11 @@
                                                }
                                                list( $qType, $qWiki ) = 
$paramsByQueue[$i]['queue'];
                                                $tasksRun += array_sum( $result 
);
-                                               list( $released, $abandoned, 
$pruned, $undelayed ) = $result;
-                                               if ( $released > 0 || 
$undelayed > 0 ) {
+                                               list( $released, $abandoned, 
$pruned, $undelayed, $ready ) = $result;
+                                               if ( $released > 0 || 
$undelayed > 0 || $ready > 0 ) {
+                                                       // Update the map in 
the aggregator as needed. This checks
+                                                       // $ready to handle 
lost aggregator updates as well as to
+                                                       // merge after network 
partitions that caused fail-over.
                                                        
$mapSet[$this->encQueueName( $qType, $qWiki )] = $now;
                                                }
                                                $this->incrStats( 
"job-recycle.$qType.$qWiki", $released );

-- 
To view, visit https://gerrit.wikimedia.org/r/167422
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ia54f5803dcdb03f757809d86e51bd486f0351a3e
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/services/jobrunner
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Ori.livneh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to