jenkins-bot has submitted this change and it was merged. Change subject: Fails gracefully when running a job for an unknown cluster ......................................................................
Fails gracefully when running a job for an unknown cluster Added a check to ensure that the cluster is known. This can be useful in case of an emergency where the job queue has been flooded with failed jobs due to an unreliable cluster. We may be forced to remove this cluster from the config. Cirrus jobs created against this removed cluster will "gracefully" fails without attempting to retry. Without this patch an unknown cluster generates a connection to localhost and will probably fail with 3 retries. We should probably fail earlier but I'm not sure that the job runner will be happy to receive an exception when calling the job constructor. Change-Id: I418473a2f81bdb012897584028d7342feed588d3 (cherry picked from commit 7e3fd9a5dec37bd05bca13b9a74933d1b994a7ba) --- M includes/Job/ElasticaWrite.php M includes/Job/Job.php 2 files changed, 36 insertions(+), 0 deletions(-) Approvals: EBernhardson: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/Job/ElasticaWrite.php b/includes/Job/ElasticaWrite.php index caaeceb..b3f034f 100644 --- a/includes/Job/ElasticaWrite.php +++ b/includes/Job/ElasticaWrite.php @@ -43,6 +43,23 @@ protected function decideClusters() { $config = ConfigFactory::getDefaultInstance()->makeConfig( 'CirrusSearch' ); + if ( $this->params['cluster'] !== null && + $config->getElement( 'CirrusSearchClusters', $this->params['cluster'] ) === null ) { + // Just in case a job is present in the queue but its cluster + // has been removed from the config file. + $cluster = $this->params['cluster']; + LoggerFactory::getInstance( 'CirrusSearch' )->warning( + "Received job {method} for unknown cluster {cluster} {diff}s after insertion", + array( + 'method' => $this->params['method'], + 'arguments' => $this->params['arguments'], + 'diff' => time() - $this->params['createdAt'], + 'cluster' => $cluster + ) + ); + $this->setAllowRetries( false ); + throw new \RuntimeException( "Received job for unknown cluster $cluster." ); + } if ( $this->params['cluster'] !== null ) { // parent::__construct initialized the correct connection $name = $this->connection->getClusterName(); diff --git a/includes/Job/Job.php b/includes/Job/Job.php index 4d3a730..5b05181 100644 --- a/includes/Job/Job.php +++ b/includes/Job/Job.php @@ -32,6 +32,11 @@ */ protected $connection; + /** + * @var boolean should we retry if this job failed + */ + private $allowRetries = true; + public function __construct( $title, $params ) { $params += array( 'cluster' => null ); // eg: DeletePages -> cirrusSearchDeletePages @@ -124,4 +129,18 @@ * Actually perform the labor of the job */ abstract protected function doJob(); + + /** + * {@inheritDoc} + */ + public function allowRetries() { + return $this->allowRetries; + } + + /** + * @param boolena $allowRetries set wether should be retried if it fails + */ + protected function setAllowRetries( $allowRetries ) { + $this->allowRetries = $allowRetries; + } } -- To view, visit https://gerrit.wikimedia.org/r/247347 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I418473a2f81bdb012897584028d7342feed588d3 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: wmf/1.27.0-wmf.2 Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Chad <ch...@wikimedia.org> Gerrit-Reviewer: DCausse <dcau...@wikimedia.org> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Manybubbles <never...@wikimedia.org> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits